38 #include <sys/types.h>
44 #include <qb/qbipc_common.h>
75 static uint8_t qdevice_can_operate = 1;
76 static void *qdevice_reg_conn = NULL;
77 static uint8_t qdevice_master_wins = 0;
79 static uint8_t two_node = 0;
81 static uint8_t wait_for_all = 0;
82 static uint8_t wait_for_all_status = 0;
83 static uint8_t wait_for_all_autoset = 0;
86 static int lowest_node_id = -1;
87 static int highest_node_id = -1;
89 #define DEFAULT_LMS_WIN 10000
90 static uint8_t last_man_standing = 0;
93 static uint8_t allow_downscale = 0;
94 static uint32_t ev_barrier = 0;
96 static uint8_t ev_tracking = 0;
97 static uint32_t ev_tracking_barrier = 0;
98 static int ev_tracking_fd = -1;
113 struct qb_ipc_request_header header __attribute__((aligned(8)));
123 struct qb_ipc_request_header header __attribute__((aligned(8)));
129 struct qb_ipc_request_header header __attribute__((aligned(8)));
144 #define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO 0
145 #define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE 1
146 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG 2
147 #define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE 3
149 static void votequorum_exec_send_expectedvotes_notification(
void);
150 static int votequorum_exec_send_quorum_notification(
void *conn, uint64_t context);
151 static int votequorum_exec_send_nodelist_notification(
void *conn, uint64_t context);
153 #define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES 1
154 #define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES 2
155 #define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA 3
157 static int votequorum_exec_send_reconfigure(uint8_t
param,
unsigned int nodeid, uint32_t
value);
162 #define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER 0
163 #define VOTEQUORUM_QDEVICE_OPERATION_REGISTER 1
169 #define NODE_FLAGS_QUORATE 1
170 #define NODE_FLAGS_LEAVING 2
171 #define NODE_FLAGS_WFASTATUS 4
172 #define NODE_FLAGS_FIRST 8
173 #define NODE_FLAGS_QDEVICE_REGISTERED 16
174 #define NODE_FLAGS_QDEVICE_ALIVE 32
175 #define NODE_FLAGS_QDEVICE_CAST_VOTE 64
176 #define NODE_FLAGS_QDEVICE_MASTER_WINS 128
197 static uint8_t quorum;
198 static uint8_t cluster_is_quorate;
205 static struct list_head cluster_members_list;
209 static int quorum_members_entries = 0;
210 static int previous_quorum_members_entries = 0;
211 static int atb_nodelist_entries = 0;
218 static int cluster_nodes_entries = 0;
238 static int qdevice_timer_set = 0;
240 static int last_man_standing_timer_set = 0;
241 static int sync_nodeinfo_sent = 0;
242 static int sync_wait_for_poll_or_timeout = 0;
248 static int sync_in_progress = 0;
250 static void votequorum_sync_init (
251 const unsigned int *trans_list,
252 size_t trans_list_entries,
253 const unsigned int *member_list,
254 size_t member_list_entries,
257 static int votequorum_sync_process (
void);
258 static void votequorum_sync_activate (
void);
259 static void votequorum_sync_abort (
void);
268 static int votequorum_exec_exit_fn (
void);
269 static int votequorum_exec_send_nodeinfo(uint32_t
nodeid);
271 static void message_handler_req_exec_votequorum_nodeinfo (
274 static void exec_votequorum_nodeinfo_endian_convert (
void *message);
276 static void message_handler_req_exec_votequorum_reconfigure (
279 static void exec_votequorum_reconfigure_endian_convert (
void *message);
281 static void message_handler_req_exec_votequorum_qdevice_reg (
284 static void exec_votequorum_qdevice_reg_endian_convert (
void *message);
286 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
289 static void exec_votequorum_qdevice_reconfigure_endian_convert (
void *message);
295 .exec_endian_convert_fn = exec_votequorum_nodeinfo_endian_convert
298 .exec_handler_fn = message_handler_req_exec_votequorum_reconfigure,
299 .exec_endian_convert_fn = exec_votequorum_reconfigure_endian_convert
302 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reg,
303 .exec_endian_convert_fn = exec_votequorum_qdevice_reg_endian_convert
306 .exec_handler_fn = message_handler_req_exec_votequorum_qdevice_reconfigure,
307 .exec_endian_convert_fn = exec_votequorum_qdevice_reconfigure_endian_convert
315 static int quorum_lib_init_fn (
void *conn);
317 static int quorum_lib_exit_fn (
void *conn);
319 static void qdevice_timer_fn(
void *arg);
321 static void message_handler_req_lib_votequorum_getinfo (
void *conn,
322 const void *message);
324 static void message_handler_req_lib_votequorum_setexpected (
void *conn,
325 const void *message);
327 static void message_handler_req_lib_votequorum_setvotes (
void *conn,
328 const void *message);
330 static void message_handler_req_lib_votequorum_trackstart (
void *conn,
331 const void *message);
333 static void message_handler_req_lib_votequorum_trackstop (
void *conn,
334 const void *message);
336 static void message_handler_req_lib_votequorum_qdevice_register (
void *conn,
337 const void *message);
339 static void message_handler_req_lib_votequorum_qdevice_unregister (
void *conn,
340 const void *message);
342 static void message_handler_req_lib_votequorum_qdevice_update (
void *conn,
343 const void *message);
345 static void message_handler_req_lib_votequorum_qdevice_poll (
void *conn,
346 const void *message);
348 static void message_handler_req_lib_votequorum_qdevice_master_wins (
void *conn,
349 const void *message);
358 .lib_handler_fn = message_handler_req_lib_votequorum_setexpected,
362 .lib_handler_fn = message_handler_req_lib_votequorum_setvotes,
366 .lib_handler_fn = message_handler_req_lib_votequorum_trackstart,
370 .lib_handler_fn = message_handler_req_lib_votequorum_trackstop,
374 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_register,
378 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_unregister,
382 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_update,
386 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_poll,
390 .lib_handler_fn = message_handler_req_lib_votequorum_qdevice_master_wins,
396 .
name =
"corosync vote quorum service v1.0",
399 .private_data_size =
sizeof (
struct quorum_pd),
402 .lib_init_fn = quorum_lib_init_fn,
403 .lib_exit_fn = quorum_lib_exit_fn,
404 .lib_engine = quorum_lib_service,
406 .exec_init_fn = votequorum_exec_init_fn,
407 .exec_exit_fn = votequorum_exec_exit_fn,
408 .exec_engine = votequorum_exec_engine,
410 .sync_init = votequorum_sync_init,
411 .sync_process = votequorum_sync_process,
412 .sync_activate = votequorum_sync_activate,
413 .sync_abort = votequorum_sync_abort
418 return (&votequorum_service_engine);
423 .
name =
"corosync_votequorum",
433 #define max(a,b) (((a) > (b)) ? (a) : (b))
435 #define list_iterate(v, head) \
436 for (v = (head)->next; v != head; v = v->next)
438 static void node_add_ordered(
struct cluster_node *newnode)
454 list_add(&newnode->
list, &cluster_members_list);
473 cl = (
struct cluster_node *)&cluster_nodes[cluster_nodes_entries];
474 cluster_nodes_entries++;
495 node_add_ordered(cl);
504 static struct cluster_node *find_node_by_nodeid(
unsigned int nodeid)
533 static void get_lowest_node_id(
void)
545 (node->
node_id < lowest_node_id)) {
546 lowest_node_id = node->
node_id;
555 static void get_highest_node_id(
void)
567 (node->
node_id > highest_node_id)) {
568 highest_node_id = node->
node_id;
577 static int check_low_node_id_partition(
void)
588 (node->
node_id == lowest_node_id)) {
597 static int check_high_node_id_partition(
void)
608 (node->
node_id == highest_node_id)) {
617 static int is_in_nodelist(
int nodeid,
unsigned int *members,
int entries)
622 for (i=0; i<entries; i++) {
623 if (nodeid == members[i]) {
645 static int check_auto_tie_breaker(
void)
652 res = check_low_node_id_partition();
658 res = check_high_node_id_partition();
665 for (i=0; i < atb_nodelist_entries; i++) {
666 if (is_in_nodelist(atb_nodelist[i], quorum_members, quorum_members_entries)) {
673 for (j=0; j<i; j++) {
674 if (is_in_nodelist(atb_nodelist[j], previous_quorum_members, previous_quorum_members_entries)) {
701 static void parse_atb_string(
char *atb_string)
709 if (!strcmp(atb_string,
"lowest"))
712 if (!strcmp(atb_string,
"highest"))
715 if (atoi(atb_string)) {
717 atb_nodelist_entries = 0;
720 num = strtol(ptr, &ptr, 10);
723 atb_nodelist[atb_nodelist_entries++] = num;
727 if (atb_nodelist_entries) {
736 log_printf(
LOGSYS_LEVEL_WARNING,
"auto_tie_breaker_nodes is not valid. It must be 'lowest', 'highest' or a space-separated list of node IDs. auto_tie_breaker is disabled");
742 static int check_qdevice_master(
void)
763 static void decode_flags(uint32_t
flags)
768 "flags: quorate: %s Leaving: %s WFA Status: %s First: %s Qdevice: %s QdeviceAlive: %s QdeviceCastVote: %s QdeviceMasterWins: %s",
784 static int load_ev_tracking_barrier(
void)
787 char filename[PATH_MAX];
791 snprintf(filename,
sizeof(filename) - 1,
"%s/ev_tracking",
get_run_dir());
793 ev_tracking_fd = open(filename, O_RDWR, 0700);
794 if (ev_tracking_fd != -1) {
795 res = read (ev_tracking_fd, &ev_tracking_barrier,
sizeof(uint32_t));
796 close(ev_tracking_fd);
797 if (res ==
sizeof (uint32_t)) {
803 ev_tracking_barrier = 0;
805 ev_tracking_fd = open (filename, O_CREAT|O_RDWR, 0700);
806 if (ev_tracking_fd != -1) {
807 res = write (ev_tracking_fd, &ev_tracking_barrier,
sizeof (uint32_t));
808 if ((res == -1) || (res !=
sizeof (uint32_t))) {
810 "Unable to write to %s", filename);
812 close(ev_tracking_fd);
817 "Unable to create %s file", filename);
824 static void update_wait_for_all_status(uint8_t wfa_status)
828 wait_for_all_status = wfa_status;
829 if (wait_for_all_status) {
832 us->
flags &= ~NODE_FLAGS_WFASTATUS;
835 wait_for_all_status);
840 static void update_two_node(
void)
859 static void update_qdevice_can_operate(uint8_t status)
863 qdevice_can_operate = status;
864 icmap_set_uint8(
"runtime.votequorum.qdevice_can_operate", qdevice_can_operate);
869 static void update_qdevice_master_wins(uint8_t allow)
873 qdevice_master_wins = allow;
874 icmap_set_uint8(
"runtime.votequorum.qdevice_master_wins", qdevice_master_wins);
879 static void update_ev_tracking_barrier(uint32_t ev_t_barrier)
885 ev_tracking_barrier = ev_t_barrier;
886 icmap_set_uint32(
"runtime.votequorum.ev_tracking_barrier", ev_tracking_barrier);
888 if (lseek (ev_tracking_fd, 0, SEEK_SET) != 0) {
890 "Unable to update ev_tracking_barrier on disk data!!!");
895 res = write (ev_tracking_fd, &ev_tracking_barrier,
sizeof (uint32_t));
896 if (res !=
sizeof (uint32_t)) {
898 "Unable to update ev_tracking_barrier on disk data!!!");
900 #ifdef HAVE_FDATASYNC
901 fdatasync(ev_tracking_fd);
903 fsync(ev_tracking_fd);
913 static int calculate_quorum(
int allow_decrease,
unsigned int max_expected,
unsigned int *ret_total_votes)
917 unsigned int total_votes = 0;
918 unsigned int highest_expected = 0;
919 unsigned int newquorum, q1, q2;
920 unsigned int total_nodes = 0;
924 if ((allow_downscale) && (allow_decrease) && (max_expected)) {
925 max_expected =
max(ev_barrier, max_expected);
936 total_votes += node->
votes;
943 total_votes += qdevice->
votes;
947 if (max_expected > 0) {
948 highest_expected = max_expected;
955 q1 = (highest_expected + 2) / 2;
956 q2 = (total_votes + 2) / 2;
957 newquorum =
max(q1, q2);
963 if (!allow_decrease) {
964 newquorum =
max(quorum, newquorum);
974 if (two_node && total_nodes <= 2) {
978 if (ret_total_votes) {
979 *ret_total_votes = total_votes;
986 static void update_node_expected_votes(
int new_expected_votes)
991 if (new_expected_votes) {
1002 static void are_we_quorate(
unsigned int total_votes)
1005 int quorum_change = 0;
1013 if ((wait_for_all) && (wait_for_all_status)) {
1016 "Waiting for all cluster members. "
1017 "Current votes: %d expected_votes: %d",
1019 assert(!cluster_is_quorate);
1022 update_wait_for_all_status(0);
1025 if (quorum > total_votes) {
1029 get_lowest_node_id();
1030 get_highest_node_id();
1033 if ((auto_tie_breaker !=
ATB_NONE) &&
1037 (previous_quorum_members_entries - quorum_members_entries < quorum) &&
1038 (check_auto_tie_breaker() == 1)) {
1042 if ((qdevice_master_wins) &&
1044 (check_qdevice_master() == 1)) {
1049 if (cluster_is_quorate && !quorate) {
1053 if (!cluster_is_quorate && quorate) {
1059 if (cluster_is_quorate) {
1062 us->
flags &= ~NODE_FLAGS_QUORATE;
1067 update_wait_for_all_status(0);
1069 update_wait_for_all_status(1);
1073 if ((quorum_change) &&
1074 (sync_in_progress == 0)) {
1075 quorum_callback(quorum_members, quorum_members_entries,
1076 cluster_is_quorate, &quorum_ringid);
1077 votequorum_exec_send_quorum_notification(NULL, 0L);
1083 static void get_total_votes(
unsigned int *totalvotes,
unsigned int *current_members)
1085 unsigned int total_votes = 0;
1086 unsigned int cluster_members = 0;
1096 total_votes += node->
votes;
1100 if (qdevice->
votes) {
1101 total_votes += qdevice->
votes;
1105 *totalvotes = total_votes;
1106 *current_members = cluster_members;
1114 static void recalculate_quorum(
int allow_decrease,
int by_current_nodes)
1116 unsigned int total_votes = 0;
1117 unsigned int cluster_members = 0;
1121 get_total_votes(&total_votes, &cluster_members);
1123 if (!by_current_nodes) {
1124 cluster_members = 0;
1133 votequorum_exec_send_expectedvotes_notification();
1136 if ((ev_tracking) &&
1141 quorum = calculate_quorum(allow_decrease, cluster_members, &total_votes);
1142 update_node_expected_votes(cluster_members);
1144 are_we_quorate(total_votes);
1153 static int votequorum_read_nodelist_configuration(uint32_t *
votes,
1158 const char *iter_key;
1160 uint32_t our_pos, node_pos;
1161 uint32_t nodecount = 0;
1162 uint32_t nodelist_expected_votes = 0;
1163 uint32_t node_votes = 0;
1170 "No nodelist defined or our node is not in the nodelist");
1178 res = sscanf(iter_key,
"nodelist.node.%u.%s", &node_pos, tmp_key);
1183 if (strcmp(tmp_key,
"ring0_addr") != 0) {
1194 nodelist_expected_votes = nodelist_expected_votes + node_votes;
1196 if (node_pos == our_pos) {
1197 *votes = node_votes;
1201 *expected_votes = nodelist_expected_votes;
1211 static int votequorum_qdevice_is_configured(uint32_t *qdevice_votes)
1213 char *qdevice_model = NULL;
1219 if (strlen(qdevice_model)) {
1221 *qdevice_votes = -1;
1229 update_qdevice_can_operate(1);
1233 free(qdevice_model);
1241 #define VOTEQUORUM_READCONFIG_STARTUP 0
1242 #define VOTEQUORUM_READCONFIG_RUNTIME 1
1244 static char *votequorum_readconfig(
int runtime)
1246 uint32_t node_votes = 0, qdevice_votes = 0;
1247 uint32_t node_expected_votes = 0, expected_votes = 0;
1248 uint32_t node_count = 0;
1250 int have_nodelist, have_qdevice;
1251 char *atb_string = NULL;
1269 auto_tie_breaker = initial_auto_tie_breaker;
1277 have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
1278 have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
1287 error = (
char *)
"configuration error: nodelist or quorum.expected_votes must be configured!";
1300 if ((two_node) && (have_qdevice)) {
1302 error = (
char *)
"configuration error: two_node and quorum device cannot be configured at the same time!";
1306 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1311 update_qdevice_can_operate(0);
1322 wait_for_all_autoset = 1;
1324 (void)
icmap_get_uint8(
"quorum.last_man_standing", &last_man_standing);
1325 (void)
icmap_get_uint32(
"quorum.last_man_standing_window", &last_man_standing_window);
1326 (void)
icmap_get_uint8(
"quorum.expected_votes_tracking", &ev_tracking);
1339 "auto_tie_breaker_node: is meaningless if auto_tie_breaker is set to 0");
1343 if (atb && atb_string) {
1344 parse_atb_string(atb_string);
1347 initial_auto_tie_breaker = auto_tie_breaker;
1350 if (allow_downscale) {
1355 if (load_ev_tracking_barrier() < 0) {
1357 return ((
char *)
"Unable to load ev_tracking file!");
1359 update_ev_tracking_barrier(ev_tracking_barrier);
1369 if (wait_for_all_autoset) {
1370 wait_for_all = two_node;
1376 if (two_node && auto_tie_breaker !=
ATB_NONE) {
1386 if ((auto_tie_breaker !=
ATB_NONE) && (node_expected_votes % 2) &&
1388 if (last_man_standing) {
1397 error = (
char *)
"configuration error: auto_tie_breaker & last_man_standing not available in odd sized cluster";
1416 if ((have_qdevice) && (last_man_standing)) {
1418 error = (
char *)
"configuration error: quorum.device is not compatible with last_man_standing";
1423 update_qdevice_can_operate(0);
1427 if ((have_qdevice) && (auto_tie_breaker !=
ATB_NONE)) {
1429 error = (
char *)
"configuration error: quorum.device is not compatible with auto_tie_breaker";
1434 update_qdevice_can_operate(0);
1438 if ((have_qdevice) && (allow_downscale)) {
1440 error = (
char *)
"configuration error: quorum.device is not compatible with allow_downscale";
1445 update_qdevice_can_operate(0);
1454 if ((expected_votes) && (have_qdevice) && (qdevice_votes == -1)) {
1456 error = (
char *)
"configuration error: quorum.device.votes must be specified when quorum.expected_votes is set";
1461 update_qdevice_can_operate(0);
1470 if ((have_qdevice) &&
1471 (qdevice_votes == -1) &&
1473 (node_count != node_expected_votes)) {
1475 error = (
char *)
"configuration error: quorum.device.votes must be specified when not all nodes votes 1";
1480 update_qdevice_can_operate(0);
1488 if ((qdevice_votes > 0) && (expected_votes)) {
1489 int delta = expected_votes - qdevice_votes;
1492 error = (
char *)
"configuration error: quorum.device.votes is too high or expected_votes is too low";
1497 update_qdevice_can_operate(0);
1506 if ((have_qdevice) &&
1507 (qdevice_votes == -1) &&
1508 (!expected_votes) &&
1510 (node_count == node_expected_votes)) {
1511 qdevice_votes = node_expected_votes - 1;
1512 node_expected_votes = node_expected_votes + qdevice_votes;
1518 log_printf(
LOGSYS_LEVEL_DEBUG,
"ev_tracking=%d, ev_tracking_barrier = %d: expected_votes = %d\n", ev_tracking, ev_tracking_barrier, expected_votes);
1521 expected_votes = ev_tracking_barrier;
1524 if (have_nodelist) {
1525 us->
votes = node_votes;
1532 if (expected_votes) {
1540 if (!have_qdevice) {
1544 if (qdevice_votes != -1) {
1545 qdevice->
votes = qdevice_votes;
1552 update_wait_for_all_status(1);
1554 }
else if (wait_for_all_autoset && wait_for_all_status) {
1559 update_wait_for_all_status(0);
1567 static void votequorum_refresh_config(
1569 const char *key_name,
1574 int old_votes, old_expected_votes;
1584 if (
icmap_get_uint8(
"config.totemconfig_reload_in_progress", &reloading) ==
CS_OK && reloading) {
1589 if (strcmp(key_name,
"quorum.cancel_wait_for_all") == 0 &&
1597 old_votes = us->
votes;
1608 votequorum_exec_send_nodeinfo(us->
node_id);
1610 if (us->
votes != old_votes) {
1622 static void votequorum_exec_add_config_notification(
void)
1632 votequorum_refresh_config,
1634 &icmap_track_nodelist);
1638 votequorum_refresh_config,
1640 &icmap_track_quorum);
1644 votequorum_refresh_config,
1646 &icmap_track_reload);
1655 static int votequorum_exec_send_reconfigure(uint8_t
param,
unsigned int nodeid, uint32_t
value)
1658 struct iovec iov[1];
1682 static int votequorum_exec_send_nodeinfo(uint32_t nodeid)
1685 struct iovec iov[1];
1691 node = find_node_by_nodeid(nodeid);
1701 decode_flags(node->
flags);
1716 static int votequorum_exec_send_qdevice_reconfigure(
const char *
oldname,
const char *
newname)
1719 struct iovec iov[1];
1742 static int votequorum_exec_send_qdevice_reg(uint32_t
operation,
const char *qdevice_name_req)
1745 struct iovec iov[1];
1766 static int votequorum_exec_send_quorum_notification(
void *conn, uint64_t context)
1772 int cluster_members = 0;
1784 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1791 res_lib_votequorum_notification->quorate = cluster_is_quorate;
1792 res_lib_votequorum_notification->context = context;
1793 res_lib_votequorum_notification->node_list_entries = cluster_members;
1795 res_lib_votequorum_notification->header.size = size;
1796 res_lib_votequorum_notification->header.error =
CS_OK;
1801 res_lib_votequorum_notification->node_list[i].nodeid = node->
node_id;
1802 res_lib_votequorum_notification->node_list[i++].state = node->
state;
1804 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1806 res_lib_votequorum_notification->node_list[i++].state = qdevice->
state;
1829 static int votequorum_exec_send_nodelist_notification(
void *conn, uint64_t context)
1844 res_lib_votequorum_notification->node_list_entries = quorum_members_entries;
1845 res_lib_votequorum_notification->ring_id.nodeid = quorum_ringid.
rep.
nodeid;
1846 res_lib_votequorum_notification->ring_id.seq = quorum_ringid.
seq;
1847 res_lib_votequorum_notification->context = context;
1849 for (i=0; i<quorum_members_entries; i++) {
1850 res_lib_votequorum_notification->node_list[i] = quorum_members[i];
1854 res_lib_votequorum_notification->header.size = size;
1855 res_lib_votequorum_notification->header.error =
CS_OK;
1877 static void votequorum_exec_send_expectedvotes_notification(
void)
1902 static void exec_votequorum_qdevice_reconfigure_endian_convert (
void *message)
1909 static void message_handler_req_exec_votequorum_qdevice_reconfigure (
1910 const void *message,
1911 unsigned int nodeid)
1919 req_exec_quorum_qdevice_reconfigure->
oldname,
1920 req_exec_quorum_qdevice_reconfigure->
newname);
1922 if (!strcmp(req_exec_quorum_qdevice_reconfigure->
oldname, qdevice_name)) {
1925 strcpy(qdevice_name, req_exec_quorum_qdevice_reconfigure->
newname);
1936 static void exec_votequorum_qdevice_reg_endian_convert (
void *message)
1947 static void message_handler_req_exec_votequorum_qdevice_reg (
1948 const void *message,
1949 unsigned int nodeid)
1953 int wipe_qdevice_name = 1;
1964 switch(req_exec_quorum_qdevice_reg->
operation)
1968 if (!strlen(qdevice_name)) {
1970 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->
qdevice_name);
1981 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
1989 if (!qdevice_reg_conn) {
1998 if (!strlen(qdevice_name)) {
1999 strcpy(qdevice_name, req_exec_quorum_qdevice_reg->
qdevice_name);
2005 if ((!strncmp(req_exec_quorum_qdevice_reg->
qdevice_name,
2009 votequorum_exec_send_nodeinfo(us->
node_id);
2012 "A new qdevice with different name (new: %s old: %s) is trying to register!",
2013 req_exec_quorum_qdevice_reg->
qdevice_name, qdevice_name);
2021 qdevice_reg_conn = NULL;
2027 (node->
flags & NODE_FLAGS_QDEVICE_REGISTERED)) {
2028 wipe_qdevice_name = 0;
2032 if (wipe_qdevice_name) {
2041 static void exec_votequorum_nodeinfo_endian_convert (
void *message)
2055 static void message_handler_req_exec_votequorum_nodeinfo (
2056 const void *message,
2057 unsigned int sender_nodeid)
2066 int allow_downgrade = 0;
2068 unsigned int nodeid = req_exec_quorum_nodeinfo->
nodeid;
2075 req_exec_quorum_nodeinfo->
votes,
2077 req_exec_quorum_nodeinfo->
flags);
2080 decode_flags(req_exec_quorum_nodeinfo->
flags);
2083 node = find_node_by_nodeid(nodeid);
2085 node = allocate_node(nodeid);
2100 old_votes = node->
votes;
2102 old_state = node->
state;
2103 old_flags = node->
flags;
2107 struct cluster_node *sender_node = find_node_by_nodeid(sender_nodeid);
2109 assert(sender_node != NULL);
2111 if ((!cluster_is_quorate) &&
2112 (sender_node->
flags & NODE_FLAGS_QUORATE)) {
2113 node->
votes = req_exec_quorum_nodeinfo->
votes;
2121 node->
flags = req_exec_quorum_nodeinfo->
flags;
2122 node->
votes = req_exec_quorum_nodeinfo->
votes;
2125 if (node->
flags & NODE_FLAGS_LEAVING) {
2127 allow_downgrade = 1;
2131 if ((!cluster_is_quorate) &&
2132 (node->
flags & NODE_FLAGS_QUORATE)) {
2133 allow_downgrade = 1;
2137 if (node->
flags & NODE_FLAGS_QUORATE || (ev_tracking)) {
2143 if ((last_man_standing) && (node->
votes > 1)) {
2145 "cluster nodes votes are set to 1. Disabling LMS.");
2146 last_man_standing = 0;
2147 if (last_man_standing_timer_set) {
2149 last_man_standing_timer_set = 0;
2156 (node->
flags & NODE_FLAGS_FIRST) ||
2157 (old_votes != node->
votes) ||
2159 (old_flags != node->
flags) ||
2160 (old_state != node->
state)) {
2161 recalculate_quorum(allow_downgrade, by_node);
2164 if ((wait_for_all) &&
2165 (!(node->
flags & NODE_FLAGS_WFASTATUS)) &&
2166 (node->
flags & NODE_FLAGS_QUORATE)) {
2167 update_wait_for_all_status(0);
2173 static void exec_votequorum_reconfigure_endian_convert (
void *message)
2185 static void message_handler_req_exec_votequorum_reconfigure (
2186 const void *message,
2187 unsigned int nodeid)
2195 nodeid, req_exec_quorum_reconfigure->
nodeid);
2197 switch(req_exec_quorum_reconfigure->
param)
2200 update_node_expected_votes(req_exec_quorum_reconfigure->
value);
2202 votequorum_exec_send_expectedvotes_notification();
2203 update_ev_barrier(req_exec_quorum_reconfigure->
value);
2207 recalculate_quorum(1, 0);
2211 node = find_node_by_nodeid(req_exec_quorum_reconfigure->
nodeid);
2216 node->
votes = req_exec_quorum_reconfigure->
value;
2217 recalculate_quorum(1, 0);
2221 update_wait_for_all_status(0);
2223 req_exec_quorum_reconfigure->
nodeid);
2224 recalculate_quorum(0, 0);
2233 static int votequorum_exec_exit_fn (
void)
2243 if (allow_downscale) {
2245 ret = votequorum_exec_send_nodeinfo(us->
node_id);
2248 if ((ev_tracking) && (ev_tracking_fd != -1)) {
2249 close(ev_tracking_fd);
2257 static void votequorum_set_icmap_ro_keys(
void)
2277 list_init(&cluster_members_list);
2278 list_init(&trackers_list);
2281 memset(cluster_nodes, 0,
sizeof(cluster_nodes));
2289 return ((
char *)
"Could not allocate node.");
2300 return ((
char *)
"Could not allocate node.");
2313 recalculate_quorum(0, 0);
2318 votequorum_set_icmap_ro_keys();
2323 votequorum_exec_add_config_notification();
2328 votequorum_exec_send_nodeinfo(us->
node_id);
2339 static void votequorum_last_man_standing_timer_fn(
void *arg)
2343 last_man_standing_timer_set = 0;
2344 if (cluster_is_quorate) {
2345 recalculate_quorum(1,1);
2351 static void votequorum_sync_init (
2352 const unsigned int *trans_list,
size_t trans_list_entries,
2353 const unsigned int *member_list,
size_t member_list_entries,
2363 sync_in_progress = 1;
2364 sync_nodeinfo_sent = 0;
2365 sync_wait_for_poll_or_timeout = 0;
2367 if (member_list_entries > 1) {
2368 us->
flags &= ~NODE_FLAGS_FIRST;
2377 for (i = 0; i < quorum_members_entries; i++) {
2379 for (j = 0; j < member_list_entries; j++) {
2380 if (quorum_members[i] == member_list[j]) {
2387 node = find_node_by_nodeid(quorum_members[i]);
2394 if (last_man_standing) {
2395 if (((member_list_entries >= quorum) && (left_nodes)) ||
2396 ((member_list_entries <= quorum) && (auto_tie_breaker !=
ATB_NONE) && (check_low_node_id_partition() == 1))) {
2397 if (last_man_standing_timer_set) {
2399 last_man_standing_timer_set = 0;
2401 corosync_api->
timer_add_duration((
unsigned long long)last_man_standing_window*1000000,
2402 NULL, votequorum_last_man_standing_timer_fn,
2403 &last_man_standing_timer);
2404 last_man_standing_timer_set = 1;
2408 memcpy(previous_quorum_members, quorum_members,
sizeof(
unsigned int) * quorum_members_entries);
2409 previous_quorum_members_entries = quorum_members_entries;
2411 memcpy(quorum_members, member_list,
sizeof(
unsigned int) * member_list_entries);
2412 quorum_members_entries = member_list_entries;
2413 memcpy(&quorum_ringid, ring_id,
sizeof(*ring_id));
2415 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED && us->
flags & NODE_FLAGS_QDEVICE_ALIVE) {
2419 if (qdevice_timer_set) {
2422 corosync_api->
timer_add_duration((
unsigned long long)qdevice_sync_timeout*1000000, qdevice,
2423 qdevice_timer_fn, &qdevice_timer);
2424 qdevice_timer_set = 1;
2425 sync_wait_for_poll_or_timeout = 1;
2428 qdevice_name, qdevice_sync_timeout);
2434 static int votequorum_sync_process (
void)
2436 if (!sync_nodeinfo_sent) {
2437 votequorum_exec_send_nodeinfo(us->
node_id);
2439 if (strlen(qdevice_name)) {
2443 votequorum_exec_send_nodelist_notification(NULL, 0LL);
2444 sync_nodeinfo_sent = 1;
2447 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED && sync_wait_for_poll_or_timeout) {
2458 static void votequorum_sync_activate (
void)
2460 recalculate_quorum(0, 0);
2461 quorum_callback(quorum_members, quorum_members_entries,
2462 cluster_is_quorate, &quorum_ringid);
2463 votequorum_exec_send_quorum_notification(NULL, 0L);
2465 sync_in_progress = 0;
2468 static void votequorum_sync_abort (
void)
2480 if (q_set_quorate_fn == NULL) {
2481 return ((
char *)
"Quorate function not set");
2485 quorum_callback = q_set_quorate_fn;
2488 &votequorum_service[0]);
2502 static int quorum_lib_init_fn (
void *conn)
2508 list_init (&pd->
list);
2515 static int quorum_lib_exit_fn (
void *conn)
2522 list_del (&quorum_pd->
list);
2523 list_init (&quorum_pd->
list);
2535 static void qdevice_timer_fn(
void *arg)
2539 if ((!(us->
flags & NODE_FLAGS_QDEVICE_ALIVE)) ||
2540 (!qdevice_timer_set)) {
2545 us->
flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2548 votequorum_exec_send_nodeinfo(us->
node_id);
2550 qdevice_timer_set = 0;
2551 sync_wait_for_poll_or_timeout = 0;
2560 static void message_handler_req_lib_votequorum_getinfo (
void *conn,
const void *message)
2565 unsigned int highest_expected = 0;
2566 unsigned int total_votes = 0;
2568 uint32_t nodeid = req_lib_votequorum_getinfo->
nodeid;
2578 node = find_node_by_nodeid(nodeid);
2589 total_votes += iternode->
votes;
2594 total_votes += qdevice->
votes;
2597 switch(node->
state) {
2624 if (cluster_is_quorate) {
2630 if (last_man_standing) {
2633 if (auto_tie_breaker !=
ATB_NONE) {
2636 if (allow_downscale) {
2644 if (node->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2647 if (node->
flags & NODE_FLAGS_QDEVICE_ALIVE) {
2669 static void message_handler_req_lib_votequorum_setexpected (
void *conn,
const void *message)
2674 unsigned int newquorum;
2675 unsigned int total_votes;
2676 uint8_t allow_downscale_status = 0;
2680 allow_downscale_status = allow_downscale;
2681 allow_downscale = 0;
2686 newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->
expected_votes, &total_votes);
2687 allow_downscale = allow_downscale_status;
2692 if (req_lib_votequorum_setexpected->
expected_votes < total_votes ||
2693 (cluster_is_quorate && (newquorum > total_votes))) {
2697 update_node_expected_votes(req_lib_votequorum_setexpected->
expected_votes);
2711 static void message_handler_req_lib_votequorum_setvotes (
void *conn,
const void *message)
2716 unsigned int newquorum;
2717 unsigned int total_votes;
2718 unsigned int saved_votes;
2724 nodeid = req_lib_votequorum_setvotes->
nodeid;
2725 node = find_node_by_nodeid(nodeid);
2734 saved_votes = node->
votes;
2735 node->
votes = req_lib_votequorum_setvotes->
votes;
2737 newquorum = calculate_quorum(1, 0, &total_votes);
2739 if (newquorum < total_votes / 2 ||
2740 newquorum > total_votes) {
2741 node->
votes = saved_votes;
2747 req_lib_votequorum_setvotes->
votes);
2758 static void message_handler_req_lib_votequorum_trackstart (
void *conn,
2759 const void *message)
2775 votequorum_exec_send_nodelist_notification(conn, req_lib_votequorum_trackstart->
context);
2776 votequorum_exec_send_quorum_notification(conn, req_lib_votequorum_trackstart->
context);
2794 list_add (&quorum_pd->
list, &trackers_list);
2806 static void message_handler_req_lib_votequorum_trackstop (
void *conn,
2807 const void *message)
2818 list_del (&quorum_pd->
list);
2819 list_init (&quorum_pd->
list);
2832 static void message_handler_req_lib_votequorum_qdevice_register (
void *conn,
2833 const void *message)
2841 if (!qdevice_can_operate) {
2842 log_printf(
LOGSYS_LEVEL_INFO,
"Registration of quorum device is disabled by incorrect corosync.conf. See logs for more information");
2847 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2848 if ((!strncmp(req_lib_votequorum_qdevice_register->
name,
2853 "A new qdevice with different name (new: %s old: %s) is trying to re-register!",
2854 req_lib_votequorum_qdevice_register->
name, qdevice_name);
2859 if (qdevice_reg_conn != NULL) {
2861 "Registration request already in progress");
2865 qdevice_reg_conn = conn;
2867 req_lib_votequorum_qdevice_register->
name) != 0) {
2869 "Unable to send qdevice registration request to cluster");
2871 qdevice_reg_conn = NULL;
2888 static void message_handler_req_lib_votequorum_qdevice_unregister (
void *conn,
2889 const void *message)
2897 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2902 if (qdevice_timer_set) {
2904 qdevice_timer_set = 0;
2905 sync_wait_for_poll_or_timeout = 0;
2907 us->
flags &= ~NODE_FLAGS_QDEVICE_REGISTERED;
2908 us->
flags &= ~NODE_FLAGS_QDEVICE_ALIVE;
2911 votequorum_exec_send_nodeinfo(us->
node_id);
2913 req_lib_votequorum_qdevice_unregister->
name);
2927 static void message_handler_req_lib_votequorum_qdevice_update (
void *conn,
2928 const void *message)
2936 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2941 votequorum_exec_send_qdevice_reconfigure(req_lib_votequorum_qdevice_update->
oldname,
2942 req_lib_votequorum_qdevice_update->
newname);
2956 static void message_handler_req_lib_votequorum_qdevice_poll (
void *conn,
2957 const void *message)
2966 if (!qdevice_can_operate) {
2971 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
2972 if (!(req_lib_votequorum_qdevice_poll->ring_id.nodeid == quorum_ringid.
rep.
nodeid &&
2973 req_lib_votequorum_qdevice_poll->ring_id.seq == quorum_ringid.
seq)) {
2975 "ring id (%u.%"PRIu64
"). Ignoring poll call.",
2976 req_lib_votequorum_qdevice_poll->ring_id.nodeid, req_lib_votequorum_qdevice_poll->ring_id.seq,
2986 if (qdevice_timer_set) {
2988 qdevice_timer_set = 0;
2991 oldflags = us->
flags;
2995 if (req_lib_votequorum_qdevice_poll->
cast_vote) {
3001 if (us->
flags != oldflags) {
3002 votequorum_exec_send_nodeinfo(us->
node_id);
3005 corosync_api->
timer_add_duration((
unsigned long long)qdevice_timeout*1000000, qdevice,
3006 qdevice_timer_fn, &qdevice_timer);
3007 qdevice_timer_set = 1;
3008 sync_wait_for_poll_or_timeout = 0;
3022 static void message_handler_req_lib_votequorum_qdevice_master_wins (
void *conn,
3023 const void *message)
3028 uint32_t oldflags = us->
flags;
3032 if (!qdevice_can_operate) {
3037 if (us->
flags & NODE_FLAGS_QDEVICE_REGISTERED) {
3043 if (req_lib_votequorum_qdevice_master_wins->
allow) {
3049 if (us->
flags != oldflags) {
3050 votequorum_exec_send_nodeinfo(us->
node_id);
3053 update_qdevice_master_wins(req_lib_votequorum_qdevice_master_wins->
allow);
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
void *(* ipc_private_data_get)(void *conn)
#define VOTEQUORUM_INFO_QUORATE
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_READCONFIG_STARTUP
const char * get_run_dir(void)
void(* timer_delete)(corosync_timer_handle_t timer_handle)
int(* timer_add_duration)(unsigned long long nanoseconds_in_future, void *data, void(*timer_nf)(void *data), corosync_timer_handle_t *handle)
const char * icmap_iter_next(icmap_iter_t iter, size_t *value_len, icmap_value_types_t *type)
Return next item in iterator iter.
#define NODE_FLAGS_WFASTATUS
#define LOGSYS_LEVEL_INFO
#define NODE_FLAGS_QUORATE
#define VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT
The corosync_service_engine struct.
void icmap_iter_finalize(icmap_iter_t iter)
Finalize iterator.
The req_lib_votequorum_qdevice_master_wins struct.
#define VOTEQUORUM_QDEVICE_OPERATION_UNREGISTER
#define MESSAGE_REQ_EXEC_VOTEQUORUM_RECONFIGURE
int(* ipc_response_send)(void *conn, const void *msg, size_t mlen)
#define list_iterate(v, head)
char * votequorum_init(struct corosync_api_v1 *api, quorum_set_quorate_fn_t q_set_quorate_fn)
#define VOTEQUORUM_RECONFIG_PARAM_CANCEL_WFA
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
The req_lib_votequorum_qdevice_unregister struct.
#define NODE_FLAGS_QDEVICE_MASTER_WINS
The res_lib_votequorum_quorum_notification struct.
The corosync_lib_handler struct.
unsigned int highest_expected
#define VOTEQUORUM_INFO_LAST_MAN_STANDING
struct message_header header
#define VOTEQUORUM_INFO_WAIT_FOR_ALL
#define NODE_FLAGS_QDEVICE_CAST_VOTE
The res_lib_votequorum_status struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_RECONFIGURE
The corosync_exec_handler struct.
#define VOTEQUORUM_INFO_TWONODE
int(* totem_mcast)(const struct iovec *iovec, unsigned int iov_len, unsigned int guarantee)
char qdevice_name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_INFO_QDEVICE_REGISTERED
#define log_printf(level, format, args...)
unsigned int expected_votes
void(* exec_handler_fn)(const void *msg, unsigned int nodeid)
#define VOTEQUORUM_QDEVICE_NODEID
#define VOTEQUORUM_INFO_QDEVICE_MASTER_WINS
#define VOTEQUORUM_NODESTATE_MEMBER
#define SERVICE_ID_MAKE(a, b)
#define ICMAP_TRACK_DELETE
#define ICMAP_KEYNAME_MAXLEN
Maximum length of key in icmap.
void(* quorum_set_quorate_fn_t)(const unsigned int *view_list, size_t view_list_entries, int quorate, struct memb_ring_id *)
#define VOTEQUORUM_QDEVICE_OPERATION_REGISTER
cs_error_t icmap_get_uint8(const char *key_name, uint8_t *u8)
void(* error_memory_failure)(void) __attribute__((noreturn))
#define VOTEQUORUM_INFO_ALLOW_DOWNSCALE
#define LOGSYS_LEVEL_WARNING
#define ICMAP_TRACK_MODIFY
#define VOTEQUORUM_INFO_QDEVICE_ALIVE
cs_error_t icmap_set_uint32(const char *key_name, uint32_t value)
unsigned int(* totem_nodeid_get)(void)
#define CS_TRACK_CHANGES_ONLY
The req_lib_votequorum_getinfo struct.
char name[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
struct totem_ip_address rep
#define COROSYNC_LIB_FLOW_CONTROL_NOT_REQUIRED
The req_lib_votequorum_qdevice_update struct.
cs_error_t
The cs_error_t enum.
unsigned char track_flags
#define LOGSYS_LEVEL_DEBUG
LOGSYS_DECLARE_SUBSYS("VOTEQ")
The req_lib_votequorum_setvotes struct.
The corosync_api_v1 struct.
cs_error_t icmap_get_uint32(const char *key_name, uint32_t *u32)
The req_lib_votequorum_setexpected struct.
#define swab32(x)
The swab32 macro.
#define VOTEQUORUM_INFO_AUTO_TIE_BREAKER
struct corosync_service_engine * votequorum_get_service_engine_ver0(void)
The res_lib_votequorum_expectedvotes_notification struct.
The req_lib_votequorum_qdevice_register struct.
char * corosync_service_link_and_init(struct corosync_api_v1 *corosync_api, struct default_service *service)
Link and initialize a service.
char newname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define VOTEQUORUM_NODESTATE_LEAVING
The votequorum_node struct.
#define PROCESSOR_COUNT_MAX
unsigned int qdevice_votes
#define MESSAGE_REQ_EXEC_VOTEQUORUM_QDEVICE_REG
#define VOTEQUORUM_READCONFIG_RUNTIME
#define MESSAGE_REQ_EXEC_VOTEQUORUM_NODEINFO
The req_lib_votequorum_trackstart struct.
#define VOTEQUORUM_RECONFIG_PARAM_NODE_VOTES
#define VOTEQUORUM_QDEVICE_MAX_NAME_LEN
qb_loop_timer_handle corosync_timer_handle_t
corosync_timer_handle_t
The req_lib_votequorum_qdevice_poll struct.
cs_error_t icmap_get_string(const char *key_name, char **str)
Shortcut for icmap_get for string type.
#define LOGSYS_LEVEL_CRIT
char oldname[VOTEQUORUM_QDEVICE_MAX_NAME_LEN]
#define NODE_FLAGS_LEAVING
#define list_entry(ptr, type, member)
#define COROSYNC_LIB_FLOW_CONTROL_REQUIRED
#define LOGSYS_LEVEL_NOTICE
cs_error_t icmap_set_uint8(const char *key_name, uint8_t value)
void(* lib_handler_fn)(void *conn, const void *msg)
The res_lib_votequorum_getinfo struct.
#define VOTEQUORUM_NODESTATE_DEAD
cs_error_t icmap_set_ro_access(const char *key_name, int prefix, int ro_access)
Set read-only access for given key (key_name) or prefix, If prefix is set.
#define VOTEQUORUM_INFO_QDEVICE_CAST_VOTE
int(* ipc_dispatch_send)(void *conn, const void *msg, size_t mlen)
#define VOTEQUORUM_QDEVICE_DEFAULT_TIMEOUT
icmap_iter_t icmap_iter_init(const char *prefix)
Initialize iterator with given prefix.
struct memb_ring_id ring_id
uint64_t tracking_context
#define VOTEQUORUM_RECONFIG_PARAM_EXPECTED_VOTES
#define NODE_FLAGS_QDEVICE_ALIVE
qb_map_iter_t * icmap_iter_t
Itterator type.
Structure passed as new_value and old_value in change callback.
#define NODE_FLAGS_QDEVICE_REGISTERED
cs_error_t icmap_track_add(const char *key_name, int32_t track_type, icmap_notify_fn_t notify_fn, void *user_data, icmap_track_t *icmap_track)
Add tracking function for given key_name.
struct qb_ipc_request_header header __attribute__((aligned(8)))
#define ICMAP_TRACK_PREFIX
Whole prefix is tracked, instead of key only (so "totem." tracking means that "totem.nodeid", "totem.version", ...
unsigned int expected_votes