40 #include <sys/types.h>
42 #include <sys/socket.h>
45 #include <sys/ioctl.h>
46 #include <sys/param.h>
47 #include <netinet/in.h>
48 #include <arpa/inet.h>
62 #include <sys/types.h>
63 #include <sys/socket.h>
65 #include <rdma/rdma_cma.h>
74 #include <qb/qbdefs.h>
75 #include <qb/qbloop.h>
76 #define LOGSYS_UTILS_ONLY 1
80 #define COMPLETION_QUEUE_ENTRIES 100
82 #define TOTAL_READ_POSTS 100
84 #define MAX_MTU_SIZE 4096
110 unsigned int msg_len);
194 const char *
function,
218 #define log_printf(level, format, args...) \
220 instance->totemiba_log_printf ( \
222 instance->totemiba_subsys_id, \
223 __FUNCTION__, __FILE__, __LINE__, \
224 (const char *)format, ##args); \
243 void2wrid (
void *v) {
union u u; u.
v =
v;
return u.
wr_id; }
258 static inline struct send_buf *mcast_send_buf_get (
269 send_buf = malloc (
sizeof (
struct send_buf));
270 if (send_buf == NULL) {
273 send_buf->
mr = ibv_reg_mr (instance->
mcast_pd,
275 2048, IBV_ACCESS_LOCAL_WRITE);
276 if (send_buf->
mr == NULL) {
287 static inline void mcast_send_buf_put (
289 struct send_buf *send_buf)
295 static inline struct send_buf *token_send_buf_get (
298 struct send_buf *send_buf;
306 send_buf = malloc (
sizeof (
struct send_buf));
307 if (send_buf == NULL) {
312 2048, IBV_ACCESS_LOCAL_WRITE);
313 if (send_buf->
mr == NULL) {
327 struct send_buf *send_buf;
332 ibv_dereg_mr (send_buf->
mr);
340 static inline void token_send_buf_put (
342 struct send_buf *send_buf)
348 static inline struct recv_buf *recv_token_recv_buf_create (
353 recv_buf = malloc (
sizeof (
struct recv_buf));
354 if (recv_buf == NULL) {
360 IBV_ACCESS_LOCAL_WRITE);
365 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
367 recv_buf->
sge.length = 2048;
368 recv_buf->
sge.lkey = recv_buf->
mr->lkey;
369 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
376 static inline int recv_token_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
378 struct ibv_recv_wr *fail_recv;
386 static inline void recv_token_recv_buf_post_initial (
struct totemiba_instance *instance)
388 struct recv_buf *recv_buf;
392 recv_buf = recv_token_recv_buf_create (instance);
394 recv_token_recv_buf_post (instance, recv_buf);
398 static inline void recv_token_recv_buf_post_destroy (
401 struct recv_buf *recv_buf;
407 recv_buf =
list_entry (list,
struct recv_buf, list_all);
409 ibv_dereg_mr (recv_buf->
mr);
415 static inline struct recv_buf *mcast_recv_buf_create (
struct totemiba_instance *instance)
417 struct recv_buf *recv_buf;
420 recv_buf = malloc (
sizeof (
struct recv_buf));
421 if (recv_buf == NULL) {
427 IBV_ACCESS_LOCAL_WRITE);
432 recv_buf->
recv_wr.wr_id = (uintptr_t)recv_buf;
434 recv_buf->
sge.length = 2048;
435 recv_buf->
sge.lkey = mr->lkey;
436 recv_buf->
sge.addr = (uintptr_t)recv_buf->
buffer;
441 static inline int mcast_recv_buf_post (
struct totemiba_instance *instance,
struct recv_buf *recv_buf)
443 struct ibv_recv_wr *fail_recv;
451 static inline void mcast_recv_buf_post_initial (
struct totemiba_instance *instance)
453 struct recv_buf *recv_buf;
457 recv_buf = mcast_recv_buf_create (instance);
459 mcast_recv_buf_post (instance, recv_buf);
463 static inline void iba_deliver_fn (
struct totemiba_instance *instance, uint64_t wr_id, uint32_t bytes)
466 const struct recv_buf *recv_buf;
468 recv_buf = wrid2void(wr_id);
469 addr = &recv_buf->
buffer[
sizeof (
struct ibv_grh)];
474 static int mcast_cq_send_event_fn (
int events,
int suck,
void *context)
477 struct ibv_wc wc[32];
478 struct ibv_cq *ev_cq;
484 ibv_ack_cq_events (ev_cq, 1);
485 res = ibv_req_notify_cq (ev_cq, 0);
489 for (i = 0; i < res; i++) {
490 mcast_send_buf_put (instance, wrid2void(wc[i].wr_id));
497 static int mcast_cq_recv_event_fn (
int events,
int suck,
void *context)
500 struct ibv_wc wc[64];
501 struct ibv_cq *ev_cq;
507 ibv_ack_cq_events (ev_cq, 1);
508 res = ibv_req_notify_cq (ev_cq, 0);
512 for (i = 0; i < res; i++) {
513 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
514 mcast_recv_buf_post (instance, wrid2void(wc[i].wr_id));
521 static int mcast_rdma_event_fn (
int events,
int suck,
void *context)
524 struct rdma_cm_event *event;
533 switch (event->event) {
537 case RDMA_CM_EVENT_ADDR_RESOLVED:
544 case RDMA_CM_EVENT_MULTICAST_JOIN:
545 instance->
mcast_qpn =
event->param.ud.qp_num;
547 instance->
mcast_ah = ibv_create_ah (instance->
mcast_pd, &event->param.ud.ah_attr);
551 case RDMA_CM_EVENT_ADDR_ERROR:
552 case RDMA_CM_EVENT_ROUTE_ERROR:
553 case RDMA_CM_EVENT_MULTICAST_ERROR:
556 case RDMA_CM_EVENT_DEVICE_REMOVAL:
563 rdma_ack_cm_event (event);
567 static int recv_token_cq_send_event_fn (
573 struct ibv_wc wc[32];
574 struct ibv_cq *ev_cq;
580 ibv_ack_cq_events (ev_cq, 1);
581 res = ibv_req_notify_cq (ev_cq, 0);
585 for (i = 0; i < res; i++) {
586 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
587 ibv_dereg_mr (wrid2void(wc[i].wr_id));
594 static int recv_token_cq_recv_event_fn (
int events,
int suck,
void *context)
597 struct ibv_wc wc[32];
598 struct ibv_cq *ev_cq;
604 ibv_ack_cq_events (ev_cq, 1);
605 res = ibv_req_notify_cq (ev_cq, 0);
609 for (i = 0; i < res; i++) {
610 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
611 recv_token_recv_buf_post (instance, wrid2void(wc[i].wr_id));
626 recv_token_recv_buf_post_destroy (instance);
653 struct ibv_qp_init_attr init_qp_attr;
710 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
711 init_qp_attr.cap.max_send_wr = 50;
713 init_qp_attr.cap.max_send_sge = 1;
714 init_qp_attr.cap.max_recv_sge = 1;
715 init_qp_attr.qp_context = instance;
716 init_qp_attr.sq_sig_all = 0;
717 init_qp_attr.qp_type = IBV_QPT_UD;
727 recv_token_recv_buf_post_initial (instance);
733 POLLIN, instance, recv_token_cq_recv_event_fn);
739 POLLIN, instance, recv_token_cq_send_event_fn);
746 static int recv_token_rdma_event_fn (
int events,
int suck,
void *context)
749 struct rdma_cm_event *event;
750 struct rdma_conn_param conn_param;
759 switch (event->event) {
760 case RDMA_CM_EVENT_CONNECT_REQUEST:
761 recv_token_accept_destroy (instance);
764 recv_token_accept_setup (instance);
765 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
774 res = rdma_ack_cm_event (event);
778 static int send_token_cq_send_event_fn (
int events,
int suck,
void *context)
781 struct ibv_wc wc[32];
782 struct ibv_cq *ev_cq;
788 ibv_ack_cq_events (ev_cq, 1);
789 res = ibv_req_notify_cq (ev_cq, 0);
793 for (i = 0; i < res; i++) {
794 token_send_buf_put (instance, wrid2void(wc[i].wr_id));
801 static int send_token_cq_recv_event_fn (
int events,
int suck,
void *context)
804 struct ibv_wc wc[32];
805 struct ibv_cq *ev_cq;
811 ibv_ack_cq_events (ev_cq, 1);
812 res = ibv_req_notify_cq (ev_cq, 0);
816 for (i = 0; i < res; i++) {
817 iba_deliver_fn (instance, wc[i].wr_id, wc[i].byte_len);
824 static int send_token_rdma_event_fn (
int events,
int suck,
void *context)
827 struct rdma_cm_event *event;
828 struct rdma_conn_param conn_param;
837 switch (event->event) {
841 case RDMA_CM_EVENT_ADDR_RESOLVED:
847 case RDMA_CM_EVENT_ROUTE_RESOLVED:
848 memset (&conn_param, 0,
sizeof (
struct rdma_conn_param));
849 conn_param.private_data = NULL;
850 conn_param.private_data_len = 0;
853 case RDMA_CM_EVENT_ESTABLISHED:
860 case RDMA_CM_EVENT_ADDR_ERROR:
861 case RDMA_CM_EVENT_ROUTE_ERROR:
862 case RDMA_CM_EVENT_MULTICAST_ERROR:
864 "send_token_rdma_event_fn multicast error");
866 case RDMA_CM_EVENT_DEVICE_REMOVAL:
868 case RDMA_CM_EVENT_UNREACHABLE:
870 "send_token_rdma_event_fn unreachable");
874 "send_token_rdma_event_fn unknown event %d",
879 rdma_ack_cm_event (event);
886 struct ibv_qp_init_attr init_qp_attr;
945 "couldn't request notifications of the completion queue");
975 "couldn't request notifications of the completion queue");
978 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
979 init_qp_attr.cap.max_send_wr = 50;
981 init_qp_attr.cap.max_send_sge = 1;
982 init_qp_attr.cap.max_recv_sge = 1;
983 init_qp_attr.qp_context = instance;
984 init_qp_attr.sq_sig_all = 0;
985 init_qp_attr.qp_type = IBV_QPT_UD;
999 POLLIN, instance, send_token_cq_recv_event_fn);
1005 POLLIN, instance, send_token_cq_send_event_fn);
1011 POLLIN, instance, send_token_rdma_event_fn);
1044 token_send_buf_destroy (instance);
1088 POLLIN, instance, recv_token_rdma_event_fn);
1096 struct ibv_qp_init_attr init_qp_attr;
1180 memset (&init_qp_attr, 0,
sizeof (
struct ibv_qp_init_attr));
1181 init_qp_attr.cap.max_send_wr = 50;
1183 init_qp_attr.cap.max_send_sge = 1;
1184 init_qp_attr.cap.max_recv_sge = 1;
1185 init_qp_attr.qp_context = instance;
1186 init_qp_attr.sq_sig_all = 0;
1187 init_qp_attr.qp_type = IBV_QPT_UD;
1197 mcast_recv_buf_post_initial (instance);
1203 POLLIN, instance, mcast_cq_recv_event_fn);
1209 POLLIN, instance, mcast_cq_send_event_fn);
1215 POLLIN, instance, mcast_rdma_event_fn);
1219 static void timer_function_netif_check_timeout (
1248 (
const struct sockaddr_storage *)&instance->
bind_addr,
1256 (
struct sockaddr_storage *)&instance->
mcast_addr, &addr_len);
1258 res = recv_token_bind (instance);
1260 res = mcast_bind (instance);
1265 const char *cipher_type,
1266 const char *hash_type)
1291 qb_loop_t *qb_poll_handle,
1298 void (*deliver_fn) (
1301 unsigned int msg_len),
1303 void (*iface_change_fn) (
1307 void (*target_set_completed) (
1314 if (instance == NULL) {
1318 totemiba_instance_initialize (instance);
1339 100*QB_TIME_NS_IN_MSEC,
1341 timer_function_netif_check_timeout,
1347 *iba_context = instance;
1363 int processor_count)
1396 unsigned int msg_len)
1400 struct ibv_send_wr send_wr, *failed_send_wr;
1403 struct send_buf *send_buf;
1405 send_buf = token_send_buf_get (instance);
1406 if (send_buf == NULL) {
1410 memcpy (msg, ms, msg_len);
1412 send_wr.next = NULL;
1413 send_wr.sg_list = &sge;
1414 send_wr.num_sge = 1;
1415 send_wr.opcode = IBV_WR_SEND;
1416 send_wr.send_flags = IBV_SEND_SIGNALED;
1417 send_wr.wr_id = void2wrid(send_buf);
1418 send_wr.imm_data = 0;
1423 sge.length = msg_len;
1424 sge.lkey = send_buf->
mr->lkey;
1425 sge.addr = (uintptr_t)msg;
1428 res = ibv_post_send (instance->
send_token_cma_id->qp, &send_wr, &failed_send_wr);
1436 unsigned int msg_len)
1440 struct ibv_send_wr send_wr, *failed_send_wr;
1443 struct send_buf *send_buf;
1445 send_buf = mcast_send_buf_get (instance);
1446 if (send_buf == NULL) {
1451 memcpy (msg, ms, msg_len);
1452 send_wr.next = NULL;
1453 send_wr.sg_list = &sge;
1454 send_wr.num_sge = 1;
1455 send_wr.opcode = IBV_WR_SEND;
1456 send_wr.send_flags = IBV_SEND_SIGNALED;
1457 send_wr.wr_id = void2wrid(send_buf);
1458 send_wr.imm_data = 0;
1459 send_wr.wr.ud.ah = instance->
mcast_ah;
1460 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1461 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1463 sge.length = msg_len;
1464 sge.lkey = send_buf->
mr->lkey;
1465 sge.addr = (uintptr_t)msg;
1467 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1474 unsigned int msg_len)
1478 struct ibv_send_wr send_wr, *failed_send_wr;
1481 struct send_buf *send_buf;
1483 send_buf = mcast_send_buf_get (instance);
1484 if (send_buf == NULL) {
1489 memcpy (msg, ms, msg_len);
1490 send_wr.next = NULL;
1491 send_wr.sg_list = &sge;
1492 send_wr.num_sge = 1;
1493 send_wr.opcode = IBV_WR_SEND;
1494 send_wr.send_flags = IBV_SEND_SIGNALED;
1495 send_wr.wr_id = void2wrid(send_buf);
1496 send_wr.imm_data = 0;
1497 send_wr.wr.ud.ah = instance->
mcast_ah;
1498 send_wr.wr.ud.remote_qpn = instance->
mcast_qpn;
1499 send_wr.wr.ud.remote_qkey = instance->
mcast_qkey;
1501 sge.length = msg_len;
1502 sge.lkey = send_buf->
mr->lkey;
1503 sge.addr = (uintptr_t)msg;
1505 res = ibv_post_send (instance->
mcast_cma_id->qp, &send_wr, &failed_send_wr);
1528 const char *ret_char;
1559 res = send_token_unbind (instance);
1561 res = send_token_bind (instance);
unsigned int clear_node_high_bit
struct ibv_cq * mcast_recv_cq
struct list_head mcast_send_buf_head
struct sockaddr mcast_addr
struct ibv_comp_channel * send_token_recv_completion_channel
struct ibv_cq * recv_token_send_cq
int totemiba_recv_mcast_empty(void *iba_context)
struct totem_interface * interfaces
struct ibv_comp_channel * send_token_send_completion_channel
struct ibv_cq * send_token_recv_cq
const char * totemip_print(const struct totem_ip_address *addr)
struct sockaddr send_token_bind_addr
struct sockaddr local_mcast_bind_addr
int totemiba_send_flush(void *iba_context)
int totemiba_token_target_set(void *iba_context, const struct totem_ip_address *token_target)
char buffer[MAX_MTU_SIZE]
struct sockaddr token_addr
struct rdma_event_channel * send_token_channel
struct rdma_event_channel * recv_token_channel
void(* totemiba_iface_change_fn)(void *context, const struct totem_ip_address *iface_address)
struct ibv_comp_channel * mcast_recv_completion_channel
int totemiba_initialize(qb_loop_t *qb_poll_handle, void **iba_context, struct totem_config *totem_config, totemsrp_stats_t *stats, int interface_no, void *context, void(*deliver_fn)(void *context, const void *msg, unsigned int msg_len), void(*iface_change_fn)(void *context, const struct totem_ip_address *iface_address), void(*target_set_completed)(void *context))
Create an instance.
int totemiba_mcast_flush_send(void *iba_context, const void *ms, unsigned int msg_len)
unsigned char addr[TOTEMIP_ADDRLEN]
void * totemiba_buffer_alloc(void)
void(* totemiba_deliver_fn)(void *context, const void *msg, unsigned int msg_len)
struct list_head list_all
int totemiba_iface_check(void *iba_context)
struct list_head token_send_buf_free
struct list_head recv_token_recv_buf_head
char buffer[MAX_MTU_SIZE]
void(*) in totemiba_subsys_id)
void(* totemiba_log_printf)(int level, int subsys, const char *function, const char *file, int line, const char *format,...) __attribute__((format(printf
qb_loop_timer_handle timer_netif_check_timeout
struct totem_interface * totem_interface
int totemiba_crypto_set(void *iba_context, const char *cipher_type, const char *hash_type)
int totemip_iface_check(struct totem_ip_address *bindnet, struct totem_ip_address *boundto, int *interface_up, int *interface_num, int mask_high_bit)
struct list_head mcast_send_buf_free
int totemiba_token_send(void *iba_context, const void *ms, unsigned int msg_len)
void(* totemiba_target_set_completed)(void *context)
struct ibv_comp_channel * recv_token_recv_completion_channel
struct sockaddr bind_addr
int totemiba_iface_get(void *iba_context, struct totem_ip_address *addr)
struct totem_ip_address mcast_addr
struct sockaddr recv_token_dest_addr
struct ibv_cq * recv_token_recv_cq
#define LOGSYS_LEVEL_ERROR
struct ibv_cq * mcast_send_cq
struct list_head list_all
struct list_head token_send_buf_head
void(* log_printf)(int level, int subsys, const char *function_name, const char *file_name, int file_line, const char *format,...) __attribute__((format(printf
void totemiba_net_mtu_adjust(void *iba_context, struct totem_config *totem_config)
struct ibv_comp_channel * mcast_send_completion_channel
struct totem_ip_address boundto
struct ibv_pd * send_token_pd
const char * totemiba_iface_print(void *iba_context)
struct ibv_cq * send_token_send_cq
struct rdma_event_channel * mcast_channel
struct rdma_cm_id * listen_recv_token_cma_id
struct rdma_event_channel * listen_recv_token_channel
struct ibv_ah * send_token_ah
int totemiba_finalize(void *iba_context)
struct list_head list_free
qb_loop_t * totemiba_poll_handle
#define COMPLETION_QUEUE_ENTRIES
#define log_printf(level, format, args...)
struct sockaddr mcast_dest_addr
struct sockaddr send_token_dest_addr
struct totem_ip_address my_id
struct ibv_recv_wr recv_wr
struct rdma_cm_id * mcast_cma_id
int totemiba_mcast_noflush_send(void *iba_context, const void *ms, unsigned int msg_len)
struct totem_config * totem_config
struct ibv_pd * recv_token_pd
struct rdma_cm_id * recv_token_cma_id
#define list_entry(ptr, type, member)
struct rdma_cm_id * send_token_cma_id
int totemip_totemip_to_sockaddr_convert(struct totem_ip_address *ip_addr, uint16_t port, struct sockaddr_storage *saddr, int *addrlen)
struct totem_logging_configuration totem_logging_configuration
struct ibv_comp_channel * recv_token_send_completion_channel
int totemiba_processor_count_set(void *iba_context, int processor_count)
int totemiba_recv_flush(void *iba_context)
int totemip_sockaddr_to_totemip_convert(const struct sockaddr_storage *saddr, struct totem_ip_address *ip_addr)
struct totem_ip_address bindnet
void totemiba_buffer_release(void *ptr)