corosync  2.4.2
wd.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2012 Red Hat, Inc.
3  *
4  * All rights reserved.
5  *
6  * Author: Angus Salkeld <asalkeld@redhat.com>
7  *
8  * This software licensed under BSD license, the text of which follows:
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  * - Redistributions of source code must retain the above copyright notice,
14  * this list of conditions and the following disclaimer.
15  * - Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  * - Neither the name of the MontaVista Software, Inc. nor the names of its
19  * contributors may be used to endorse or promote products derived from this
20  * software without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32  * THE POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <config.h>
36 
37 #include <unistd.h>
38 #include <fcntl.h>
39 #include <sys/ioctl.h>
40 #include <linux/types.h>
41 #include <linux/watchdog.h>
42 #include <sys/reboot.h>
43 
44 #include <corosync/corotypes.h>
45 #include <corosync/corodefs.h>
46 #include <corosync/coroapi.h>
47 #include <corosync/list.h>
48 #include <corosync/logsys.h>
49 #include <corosync/icmap.h>
50 #include "fsm.h"
51 
52 #include "service.h"
53 
54 typedef enum {
60 
61 struct resource {
63  char *recovery;
65  time_t last_updated;
66  struct cs_fsm fsm;
67 
69  uint64_t check_timeout;
71 };
72 
74 
75 /*
76  * Service Interfaces required by service_message_handler struct
77  */
78 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api);
79 static int wd_exec_exit_fn (void);
80 static void wd_resource_check_fn (void* resource_ref);
81 
82 static struct corosync_api_v1 *api;
83 #define WD_DEFAULT_TIMEOUT_SEC 6
84 #define WD_DEFAULT_TIMEOUT_MS (WD_DEFAULT_TIMEOUT_SEC * CS_TIME_MS_IN_SEC)
85 #define WD_MIN_TIMEOUT_MS 500
86 #define WD_MAX_TIMEOUT_MS (120 * CS_TIME_MS_IN_SEC)
87 static uint32_t watchdog_timeout = WD_DEFAULT_TIMEOUT_SEC;
88 static uint64_t tickle_timeout = (WD_DEFAULT_TIMEOUT_MS / 2);
89 static int dog = -1;
90 static corosync_timer_handle_t wd_timer;
91 static int watchdog_ok = 1;
92 static char *watchdog_device = "/dev/watchdog";
93 
95  .name = "corosync watchdog service",
96  .id = WD_SERVICE,
97  .priority = 1,
98  .private_data_size = 0,
99  .flow_control = CS_LIB_FLOW_CONTROL_NOT_REQUIRED,
100  .lib_init_fn = NULL,
101  .lib_exit_fn = NULL,
102  .lib_engine = NULL,
103  .lib_engine_count = 0,
104  .exec_engine = NULL,
105  .exec_engine_count = 0,
106  .confchg_fn = NULL,
107  .exec_init_fn = wd_exec_init_fn,
108  .exec_exit_fn = wd_exec_exit_fn,
109  .exec_dump_fn = NULL
110 };
111 
112 static DECLARE_LIST_INIT (confchg_notify);
113 
114 /*
115  * F S M
116  */
117 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data);
118 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data);
119 
124 };
125 
129 };
130 
131 const char * wd_running_str = "running";
132 const char * wd_failed_str = "failed";
133 const char * wd_failure_str = "failure";
134 const char * wd_stopped_str = "stopped";
135 const char * wd_config_changed_str = "config_changed";
136 
138  { WD_S_STOPPED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_STOPPED, WD_S_RUNNING, -1} },
139  { WD_S_STOPPED, WD_E_FAILURE, NULL, {-1} },
140  { WD_S_RUNNING, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
141  { WD_S_RUNNING, WD_E_FAILURE, wd_resource_failed, {WD_S_FAILED, -1} },
142  { WD_S_FAILED, WD_E_CONFIG_CHANGED, wd_config_changed, {WD_S_RUNNING, WD_S_STOPPED, -1} },
143  { WD_S_FAILED, WD_E_FAILURE, NULL, {-1} },
144 };
145 
147 {
148  return (&wd_service_engine);
149 }
150 
151 static const char * wd_res_state_to_str(struct cs_fsm* fsm,
152  int32_t state)
153 {
154  switch (state) {
155  case WD_S_STOPPED:
156  return wd_stopped_str;
157  break;
158  case WD_S_RUNNING:
159  return wd_running_str;
160  break;
161  case WD_S_FAILED:
162  return wd_failed_str;
163  break;
164  }
165  return NULL;
166 }
167 
168 static const char * wd_res_event_to_str(struct cs_fsm* fsm,
169  int32_t event)
170 {
171  switch (event) {
172  case WD_E_CONFIG_CHANGED:
173  return wd_config_changed_str;
174  break;
175  case WD_E_FAILURE:
176  return wd_failure_str;
177  break;
178  }
179  return NULL;
180 }
181 
182 static void wd_fsm_cb (struct cs_fsm *fsm, int cb_event, int32_t curr_state,
183  int32_t next_state, int32_t fsm_event, void *data)
184 {
185  switch (cb_event) {
187  log_printf (LOGSYS_LEVEL_ERROR, "Fsm:%s could not find event \"%s\" in state \"%s\"",
188  fsm->name, fsm->event_to_str(fsm, fsm_event), fsm->state_to_str(fsm, curr_state));
190  break;
192  log_printf (LOGSYS_LEVEL_INFO, "Fsm:%s event \"%s\", state \"%s\" --> \"%s\"",
193  fsm->name,
194  fsm->event_to_str(fsm, fsm_event),
195  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
196  fsm->state_to_str(fsm, next_state));
197  break;
199  log_printf (LOGSYS_LEVEL_CRIT, "Fsm:%s Can't change state from \"%s\" to \"%s\" (event was \"%s\")",
200  fsm->name,
201  fsm->state_to_str(fsm, fsm->table[fsm->curr_entry].curr_state),
202  fsm->state_to_str(fsm, next_state),
203  fsm->event_to_str(fsm, fsm_event));
205  break;
206  default:
207  log_printf (LOGSYS_LEVEL_CRIT, "Fsm: Unknown callback event!");
209  break;
210  }
211 }
212 
213 /*
214  * returns (CS_TRUE == OK, CS_FALSE == failed)
215  */
216 static int32_t wd_resource_state_is_ok (struct resource *ref)
217 {
218  char* state = NULL;
219  uint64_t last_updated;
220  uint64_t my_time;
221  uint64_t allowed_period;
222  char key_name[ICMAP_KEYNAME_MAXLEN];
223 
224  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "last_updated");
225  if (icmap_get_uint64(key_name, &last_updated) != CS_OK) {
226  /* key does not exist.
227  */
228  return CS_FALSE;
229  }
230 
231  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
232  if (icmap_get_string(key_name, &state) != CS_OK || strcmp(state, "disabled") == 0) {
233  /* key does not exist.
234  */
235  if (state != NULL)
236  free(state);
237 
238  return CS_FALSE;
239  }
240 
241  if (last_updated == 0) {
242  /* initial value */
243  free(state);
244  return CS_TRUE;
245  }
246 
247  my_time = cs_timestamp_get();
248 
249  /*
250  * Here we check that the monitor has written a timestamp within the poll_period
251  * plus a grace factor of (0.5 * poll_period).
252  */
253  allowed_period = (ref->check_timeout * MILLI_2_NANO_SECONDS * 3) / 2;
254  if ((last_updated + allowed_period) < my_time) {
256  "last_updated %"PRIu64" ms too late, period:%"PRIu64".",
257  (uint64_t)(my_time/MILLI_2_NANO_SECONDS - ((last_updated + allowed_period) / MILLI_2_NANO_SECONDS)),
258  ref->check_timeout);
259  free(state);
260  return CS_FALSE;
261  }
262 
263  if (strcmp (state, wd_failed_str) == 0) {
264  free(state);
265  return CS_FALSE;
266  }
267 
268  free(state);
269  return CS_TRUE;
270 }
271 
272 static void wd_config_changed (struct cs_fsm* fsm, int32_t event, void * data)
273 {
274  char *state;
275  uint64_t tmp_value;
276  uint64_t next_timeout;
277  struct resource *ref = (struct resource*)data;
278  char key_name[ICMAP_KEYNAME_MAXLEN];
279 
280  next_timeout = ref->check_timeout;
281 
282  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "poll_period");
283  if (icmap_get_uint64(ref->res_path, &tmp_value) == CS_OK) {
284  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
286  "poll_period changing from:%"PRIu64" to %"PRIu64".",
287  ref->check_timeout, tmp_value);
288  /*
289  * To easy in the transition between poll_period's we are going
290  * to make the first timeout the bigger of the new and old value.
291  * This is to give the monitoring system time to adjust.
292  */
293  next_timeout = CS_MAX(tmp_value, ref->check_timeout);
294  ref->check_timeout = tmp_value;
295  } else {
297  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
298  tmp_value, ref->name);
299  }
300  }
301 
302  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "recovery");
303  if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
304  /* key does not exist.
305  */
307  "resource %s missing a recovery key.", ref->name);
308  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
309  return;
310  }
311  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", ref->res_path, "state");
312  if (icmap_get_string(key_name, &state) != CS_OK) {
313  /* key does not exist.
314  */
316  "resource %s missing a state key.", ref->name);
317  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
318  return;
319  }
320  if (ref->check_timer) {
321  api->timer_delete(ref->check_timer);
322  ref->check_timer = 0;
323  }
324 
325  if (strcmp(wd_stopped_str, state) == 0) {
326  cs_fsm_state_set(&ref->fsm, WD_S_STOPPED, ref, wd_fsm_cb);
327  } else {
328  api->timer_add_duration(next_timeout * MILLI_2_NANO_SECONDS,
329  ref, wd_resource_check_fn, &ref->check_timer);
330  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
331  }
332  free(state);
333 }
334 
335 static void wd_resource_failed (struct cs_fsm* fsm, int32_t event, void * data)
336 {
337  struct resource* ref = (struct resource*)data;
338 
339  if (ref->check_timer) {
340  api->timer_delete(ref->check_timer);
341  ref->check_timer = 0;
342  }
343 
344  log_printf (LOGSYS_LEVEL_CRIT, "%s resource \"%s\" failed!",
345  ref->recovery, (char*)ref->name);
346  if (strcmp (ref->recovery, "watchdog") == 0 ||
347  strcmp (ref->recovery, "quit") == 0) {
348  watchdog_ok = 0;
349  }
350  else if (strcmp (ref->recovery, "reboot") == 0) {
351  reboot(RB_AUTOBOOT);
352  }
353  else if (strcmp (ref->recovery, "shutdown") == 0) {
354  reboot(RB_POWER_OFF);
355  }
356  cs_fsm_state_set(fsm, WD_S_FAILED, data, wd_fsm_cb);
357 }
358 
359 static void wd_key_changed(
360  int32_t event,
361  const char *key_name,
362  struct icmap_notify_value new_val,
363  struct icmap_notify_value old_val,
364  void *user_data)
365 {
366  struct resource* ref = (struct resource*)user_data;
367  char *last_key_part;
368 
369  if (ref == NULL) {
370  return ;
371  }
372 
373  last_key_part = strrchr(key_name, '.');
374  if (last_key_part == NULL) {
375  return ;
376  }
377  last_key_part++;
378 
379  if (event == ICMAP_TRACK_ADD || event == ICMAP_TRACK_MODIFY) {
380  if (strcmp(last_key_part, "last_updated") == 0 ||
381  strcmp(last_key_part, "current") == 0) {
382  return;
383  }
384 
385  cs_fsm_process(&ref->fsm, WD_E_CONFIG_CHANGED, ref, wd_fsm_cb);
386  }
387 
388  if (event == ICMAP_TRACK_DELETE && ref != NULL) {
389  if (strcmp(last_key_part, "state") != 0) {
390  return ;
391  }
392 
394  "resource \"%s\" deleted from cmap!",
395  ref->name);
396 
397  api->timer_delete(ref->check_timer);
398  ref->check_timer = 0;
400 
401  free(ref);
402  }
403 }
404 
405 static void wd_resource_check_fn (void* resource_ref)
406 {
407  struct resource* ref = (struct resource*)resource_ref;
408 
409  if (wd_resource_state_is_ok (ref) == CS_FALSE) {
410  cs_fsm_process(&ref->fsm, WD_E_FAILURE, ref, wd_fsm_cb);
411  return;
412  }
414  ref, wd_resource_check_fn, &ref->check_timer);
415 }
416 
417 /*
418  * return 0 - fully configured
419  * return -1 - partially configured
420  */
421 static int32_t wd_resource_create (char *res_path, char *res_name)
422 {
423  char *state;
424  uint64_t tmp_value;
425  struct resource *ref = calloc (1, sizeof (struct resource));
426  char key_name[ICMAP_KEYNAME_MAXLEN];
427 
428  strcpy(ref->res_path, res_path);
430  ref->check_timer = 0;
431 
432  strcpy(ref->name, res_name);
433  ref->fsm.name = ref->name;
434  ref->fsm.table = wd_fsm_table;
435  ref->fsm.entries = sizeof(wd_fsm_table) / sizeof(struct cs_fsm_entry);
436  ref->fsm.curr_entry = 0;
437  ref->fsm.curr_state = WD_S_STOPPED;
438  ref->fsm.state_to_str = wd_res_state_to_str;
439  ref->fsm.event_to_str = wd_res_event_to_str;
440 
441  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "poll_period");
442  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
443  icmap_set_uint64(key_name, ref->check_timeout);
444  } else {
445  if (tmp_value >= WD_MIN_TIMEOUT_MS && tmp_value <= WD_MAX_TIMEOUT_MS) {
446  ref->check_timeout = tmp_value;
447  } else {
449  "Could NOT use poll_period:%"PRIu64" ms for resource %s",
450  tmp_value, ref->name);
451  }
452  }
453 
454  icmap_track_add(res_path,
456  wd_key_changed,
457  ref, &ref->icmap_track);
458 
459  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "recovery");
460  if (icmap_get_string(key_name, &ref->recovery) != CS_OK) {
461  /* key does not exist.
462  */
464  "resource %s missing a recovery key.", ref->name);
465  return -1;
466  }
467  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "state");
468  if (icmap_get_string(key_name, &state) != CS_OK) {
469  /* key does not exist.
470  */
472  "resource %s missing a state key.", ref->name);
473  return -1;
474  }
475 
476  snprintf(key_name, ICMAP_KEYNAME_MAXLEN, "%s%s", res_path, "last_updated");
477  if (icmap_get_uint64(key_name, &tmp_value) != CS_OK) {
478  /* key does not exist.
479  */
480  ref->last_updated = 0;
481  } else {
482  ref->last_updated = tmp_value;
483  }
484 
485  /*
486  * delay the first check to give the monitor time to start working.
487  */
488  tmp_value = CS_MAX(ref->check_timeout * 2, WD_DEFAULT_TIMEOUT_MS);
489  api->timer_add_duration(tmp_value * MILLI_2_NANO_SECONDS,
490  ref,
491  wd_resource_check_fn, &ref->check_timer);
492 
493  cs_fsm_state_set(&ref->fsm, WD_S_RUNNING, ref, wd_fsm_cb);
494  return 0;
495 }
496 
497 
498 static void wd_tickle_fn (void* arg)
499 {
500  ENTER();
501 
502  if (watchdog_ok) {
503  if (dog > 0) {
504  ioctl(dog, WDIOC_KEEPALIVE, &watchdog_ok);
505  }
506  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
507  wd_tickle_fn, &wd_timer);
508  }
509  else {
510  log_printf (LOGSYS_LEVEL_ALERT, "NOT tickling the watchdog!");
511  }
512 
513 }
514 
515 static void wd_resource_created_cb(
516  int32_t event,
517  const char *key_name,
518  struct icmap_notify_value new_val,
519  struct icmap_notify_value old_val,
520  void *user_data)
521 {
522  char res_name[ICMAP_KEYNAME_MAXLEN];
523  char res_type[ICMAP_KEYNAME_MAXLEN];
524  char tmp_key[ICMAP_KEYNAME_MAXLEN];
525  int res;
526 
527  if (event != ICMAP_TRACK_ADD) {
528  return ;
529  }
530 
531  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
532  if (res != 3) {
533  return ;
534  }
535 
536  if (strcmp(tmp_key, "state") != 0) {
537  return ;
538  }
539 
540  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
541  wd_resource_create (tmp_key, res_name);
542 }
543 
544 static void wd_scan_resources (void)
545 {
546  int res_count = 0;
547  icmap_track_t icmap_track = NULL;
548  icmap_iter_t iter;
549  const char *key_name;
550  int res;
551  char res_name[ICMAP_KEYNAME_MAXLEN];
552  char res_type[ICMAP_KEYNAME_MAXLEN];
553  char tmp_key[ICMAP_KEYNAME_MAXLEN];
554 
555  ENTER();
556 
557  iter = icmap_iter_init("resources.");
558  while ((key_name = icmap_iter_next(iter, NULL, NULL)) != NULL) {
559  res = sscanf(key_name, "resources.%[^.].%[^.].%[^.]", res_type, res_name, tmp_key);
560  if (res != 3) {
561  continue ;
562  }
563 
564  if (strcmp(tmp_key, "state") != 0) {
565  continue ;
566  }
567 
568  snprintf(tmp_key, ICMAP_KEYNAME_MAXLEN, "resources.%s.%s.", res_type, res_name);
569  if (wd_resource_create (tmp_key, res_name) == 0) {
570  res_count++;
571  }
572  }
573  icmap_iter_finalize(iter);
574 
575  icmap_track_add("resources.process.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
576  wd_resource_created_cb, NULL, &icmap_track);
577  icmap_track_add("resources.system.", ICMAP_TRACK_ADD | ICMAP_TRACK_PREFIX,
578  wd_resource_created_cb, NULL, &icmap_track);
579 
580  if (res_count == 0) {
581  log_printf (LOGSYS_LEVEL_INFO, "no resources configured.");
582  }
583 }
584 
585 
586 static void watchdog_timeout_apply (uint32_t new)
587 {
588  struct watchdog_info ident;
589  uint32_t original_timeout = 0;
590 
591  if (dog > 0) {
592  ioctl(dog, WDIOC_GETTIMEOUT, &original_timeout);
593  }
594 
595  if (new == original_timeout) {
596  return;
597  }
598 
599  watchdog_timeout = new;
600 
601  if (dog > 0) {
602  ioctl(dog, WDIOC_GETSUPPORT, &ident);
603  if (ident.options & WDIOF_SETTIMEOUT) {
604  /* yay! the dog is trained.
605  */
606  ioctl(dog, WDIOC_SETTIMEOUT, &watchdog_timeout);
607  }
608  ioctl(dog, WDIOC_GETTIMEOUT, &watchdog_timeout);
609  }
610 
611  if (watchdog_timeout == new) {
612  tickle_timeout = (watchdog_timeout * CS_TIME_MS_IN_SEC)/ 2;
613 
614  /* reset the tickle timer in case it was reduced.
615  */
616  api->timer_delete (wd_timer);
617  api->timer_add_duration(tickle_timeout*MILLI_2_NANO_SECONDS, NULL,
618  wd_tickle_fn, &wd_timer);
619 
620  log_printf (LOGSYS_LEVEL_DEBUG, "The Watchdog timeout is %d seconds", watchdog_timeout);
621  log_printf (LOGSYS_LEVEL_DEBUG, "The tickle timeout is %"PRIu64" ms", tickle_timeout);
622  } else {
624  "Could not change the Watchdog timeout from %d to %d seconds",
625  original_timeout, new);
626  }
627 
628 }
629 
630 static int setup_watchdog(void)
631 {
632  struct watchdog_info ident;
633  char *str;
634 
635  ENTER();
636 
637  if (icmap_get_string("resources.watchdog_device", &str) == CS_OK) {
638  if (strcmp (str, "off") == 0) {
639  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog disabled by configuration");
640  free(str);
641  dog = -1;
642  return -1;
643  } else {
644  watchdog_device = str;
645  }
646  }
647 
648  if (access (watchdog_device, W_OK) != 0) {
649  log_printf (LOGSYS_LEVEL_WARNING, "No Watchdog %s, try modprobe <a watchdog>", watchdog_device);
650  dog = -1;
651  return -1;
652  }
653 
654  /* here goes, lets hope they have "Magic Close"
655  */
656  dog = open(watchdog_device, O_WRONLY);
657 
658  if (dog == -1) {
659  log_printf (LOGSYS_LEVEL_WARNING, "Watchdog %s exists but couldn't be opened.", watchdog_device);
660  dog = -1;
661  return -1;
662  }
663 
664  /* Right we have the dog.
665  * Lets see what breed it is.
666  */
667 
668  ioctl(dog, WDIOC_GETSUPPORT, &ident);
669  log_printf (LOGSYS_LEVEL_INFO, "Watchdog %s is now been tickled by corosync.", watchdog_device);
670  log_printf (LOGSYS_LEVEL_DEBUG, "%s", ident.identity);
671 
672  watchdog_timeout_apply (watchdog_timeout);
673 
674  ioctl(dog, WDIOC_SETOPTIONS, WDIOS_ENABLECARD);
675 
676  return 0;
677 }
678 
679 static void wd_top_level_key_changed(
680  int32_t event,
681  const char *key_name,
682  struct icmap_notify_value new_val,
683  struct icmap_notify_value old_val,
684  void *user_data)
685 {
686  uint32_t tmp_value_32;
687 
688  ENTER();
689 
690  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) == CS_OK) {
691  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
692  watchdog_timeout_apply (tmp_value_32);
693  return;
694  }
695  }
696 
698  "Set watchdog_timeout is out of range (2..120).");
699  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
700 }
701 
702 static void watchdog_timeout_get_initial (void)
703 {
704  uint32_t tmp_value_32;
705  icmap_track_t icmap_track = NULL;
706 
707  ENTER();
708 
709  if (icmap_get_uint32("resources.watchdog_timeout", &tmp_value_32) != CS_OK) {
710  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
711 
712  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
713  }
714  else {
715  if (tmp_value_32 >= 2 && tmp_value_32 <= 120) {
716  watchdog_timeout_apply (tmp_value_32);
717  }
718  else {
720  "Set watchdog_timeout is out of range (2..120).");
722  "use default value %d seconds.", WD_DEFAULT_TIMEOUT_SEC);
723  watchdog_timeout_apply (WD_DEFAULT_TIMEOUT_SEC);
724  icmap_set_uint32("resources.watchdog_timeout", watchdog_timeout);
725  }
726  }
727 
728  icmap_track_add("resources.watchdog_timeout", ICMAP_TRACK_MODIFY,
729  wd_top_level_key_changed, NULL, &icmap_track);
730 
731 }
732 
733 static char *wd_exec_init_fn (struct corosync_api_v1 *corosync_api)
734 {
735 
736  ENTER();
737 
738  api = corosync_api;
739 
740  watchdog_timeout_get_initial();
741 
742  setup_watchdog();
743 
744  wd_scan_resources();
745 
746  return NULL;
747 }
748 
749 static int wd_exec_exit_fn (void)
750 {
751  char magic = 'V';
752  ENTER();
753 
754  if (dog > 0) {
755  log_printf (LOGSYS_LEVEL_INFO, "magically closing the watchdog.");
756  write (dog, &magic, 1);
757  }
758  return 0;
759 }
760 
761 
#define CS_TRUE
Definition: corotypes.h:54
const char * name
Definition: coroapi.h:492
const char * wd_running_str
Definition: wd.c:131
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
Definition: icmap.c:1103
struct cs_fsm fsm
Definition: wd.c:66
#define LOGSYS_LEVEL_INFO
Definition: logsys.h:73
const char * wd_config_changed_str
Definition: wd.c:135
int32_t curr_state
Definition: fsm.h:67
#define CS_FALSE
Definition: corotypes.h:53
void(* timer_delete)(corosync_timer_handle_t timer_handle)
Definition: coroapi.h:241
const char * wd_failed_str
Definition: wd.c:132
The corosync_service_engine struct.
Definition: coroapi.h:491
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
Definition: icmap.c:1124
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
Definition: coroapi.h:229
#define corosync_exit_error(err)
Definition: exec/util.h:70
Definition: fsm.h:65
#define WD_DEFAULT_TIMEOUT_SEC
Definition: wd.c:83
cs_fsm_event_to_str_fn event_to_str
Definition: fsm.h:72
time_t last_updated
Definition: wd.c:65
#define log_printf(level, format, args...)
Definition: logsys.h:319
corosync_timer_handle_t check_timer
Definition: wd.c:68
Definition: wd.c:61
#define ICMAP_TRACK_DELETE
Definition: icmap.h:77
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
Definition: icmap.h:48
const char * wd_failure_str
Definition: wd.c:133
char res_path[ICMAP_KEYNAME_MAXLEN]
Definition: wd.c:62
size_t entries
Definition: fsm.h:69
#define LOGSYS_LEVEL_WARNING
Definition: logsys.h:71
#define ICMAP_TRACK_MODIFY
Definition: icmap.h:78
struct corosync_service_engine * wd_get_service_engine_ver0(void)
Definition: wd.c:146
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
Definition: icmap.c:611
void * user_data
Definition: sam.c:127
icmap_track_t icmap_track
Definition: wd.c:70
#define ICMAP_TRACK_ADD
Definition: icmap.h:76
struct corosync_service_engine wd_service_engine
Definition: wd.c:94
const char * name
Definition: fsm.h:66
#define LOGSYS_LEVEL_ERROR
Definition: logsys.h:70
Linked list API.
#define DECLARE_LIST_INIT(name)
Definition: list.h:51
wd_resource_event
Definition: wd.c:126
cs_error_t icmap_track_delete(icmap_track_t icmap_track)
Remove previously added track.
Definition: icmap.c:1212
#define LOGSYS_LEVEL_DEBUG
Definition: logsys.h:74
char name[CS_MAX_NAME_LENGTH]
Definition: wd.c:64
The corosync_api_v1 struct.
Definition: coroapi.h:225
char * recovery
Definition: wd.c:63
#define CS_FSM_CB_EVENT_STATE_SET
Definition: fsm.h:55
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
Definition: icmap.c:866
struct cs_fsm_entry wd_fsm_table[]
Definition: wd.c:137
#define WD_MAX_TIMEOUT_MS
Definition: wd.c:86
cs_error_t icmap_set_uint64(const char *key_name, uint64_t value)
Definition: icmap.c:623
#define ENTER
Definition: logsys.h:320
cs_fsm_state_to_str_fn state_to_str
Definition: fsm.h:71
#define MILLI_2_NANO_SECONDS
Definition: coroapi.h:105
Definition: fsm.h:58
#define CS_FSM_CB_EVENT_PROCESS_NF
Definition: fsm.h:54
cs_error_t icmap_get_uint64(const char *key_name, uint64_t *u64)
Definition: icmap.c:878
#define WD_DEFAULT_TIMEOUT_MS
Definition: wd.c:84
#define CS_FSM_CB_EVENT_STATE_SET_NF
Definition: fsm.h:56
#define CS_MAX(x, y)
Definition: corotypes.h:57
uint64_t check_timeout
Definition: wd.c:69
#define CS_MAX_NAME_LENGTH
Definition: corotypes.h:55
#define WD_MIN_TIMEOUT_MS
Definition: wd.c:85
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
Definition: coroapi.h:74
wd_resource_state_t
Definition: wd.c:54
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
Definition: icmap.c:896
#define LOGSYS_LEVEL_CRIT
Definition: logsys.h:69
int32_t curr_entry
Definition: fsm.h:68
#define LOGSYS_LEVEL_ALERT
Definition: logsys.h:68
#define CS_TIME_MS_IN_SEC
Definition: corotypes.h:129
int32_t curr_state
Definition: fsm.h:59
struct cs_fsm_entry * table
Definition: fsm.h:70
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
Definition: icmap.c:1097
wd_resource_state
Definition: wd.c:120
qb_map_iter_t * icmap_iter_t
Itterator type.
Definition: icmap.h:123
Structure passed as new_value and old_value in change callback.
Definition: icmap.h:91
const char * wd_stopped_str
Definition: wd.c:134
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
Definition: icmap.c:1167
LOGSYS_DECLARE_SUBSYS("WD")
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem.nodeid", "totem.version", ...
Definition: icmap.h:85