iser: enhance connection procedure

This patch is used to fix the following problems in the current connection
method:
1. iscsi_iser_connect() waits until the connection is established or failed,
and may block the caller for a long time.
2. Although there's a cm_thread handles communication events, but in fact it
has no effects after the connection is established.
3. Resources are not released properly after reconnection failed. And once we
try to reconnect again, the resources will leak permanently.
(see iscsi_reconnect()).

This patch eliminate cm_thread and handle communication events in the caller
thread.
Connection procedure:
1. Create a mock fd by eventfd() (or just use old_iscsi->fd while reconnecting),
and assign it to iscsi->fd.
2. Create communication event channel, make it non-blocking and dup the
notifier fd to iscsi->fd.
3. Handle communication events by iscsi_which_events()/iscsi_service() loop
until connection established or falied.
4. If connection is established successfully, dup the notifier fd of completion
queue (CQ) events to iscsi->fd.
5. Handle completion queue (CQ) events by iscsi_which_events()/iscsi_service()
loop.
The entire procedure is non-blocking.

After established, whenever iscsi_service() is called with revents=0 or
queue_pdu() is called with a NOP pdu, communication events will be checked.

When connection failed, iser transport cleanup itself before callbacks.

Signed-off-by: wanghonghao <wanghonghao@bytedance.com>
This commit is contained in:
wanghonghao
2020-04-06 21:07:54 +08:00
parent cdcb35e6c6
commit 2212021747
3 changed files with 141 additions and 131 deletions

View File

@@ -29,8 +29,6 @@
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/ioctl.h>
#include <pthread.h>
#include <semaphore.h>
#ifdef __linux
@@ -72,11 +70,6 @@ enum desc_type {
ISCSI_CONTROL = 0,
ISCSI_COMMAND};
enum conn_state{
CONN_ERROR = 0,
CONN_DISCONNECTED,
CONN_ESTABLISHED};
enum data_dir{
DATA_WRITE = 0,
DATA_READ};
@@ -179,22 +172,19 @@ struct iser_buf_chunk {
struct iser_conn {
struct rdma_cm_id *cma_id;
struct rdma_event_channel *cma_channel;
struct rdma_cm_event *cma_event;
struct ibv_pd *pd;
struct ibv_cq *cq;
struct ibv_qp *qp;
struct ibv_comp_channel *comp_channel;
struct ibv_recv_wr rx_wr[ISER_MIN_POSTED_RX];
int rdma_connect_sent;
sem_t sem_connect;
struct ibv_recv_wr rx_wr[ISER_MIN_POSTED_RX];
struct ibv_mr *login_resp_mr;
unsigned char *login_resp_buf;
pthread_t cmthread;
struct iser_rx_desc *rx_descs;
uint32_t num_rx_descs;
unsigned int rx_desc_head;
@@ -204,8 +194,6 @@ struct iser_conn {
int min_posted_rx;
uint16_t max_cmds;
enum conn_state conn_state;
struct iser_tx_desc *tx_desc;
struct iser_buf_chunk *buf_chunk;
};

View File

@@ -462,6 +462,7 @@ int iscsi_reconnect(struct iscsi_context *iscsi)
for (i = 0; i < iscsi->smalloc_free; i++) {
iscsi_free(iscsi, iscsi->smalloc_ptrs[i]);
}
iscsi_free(iscsi, iscsi->opaque);
tmp_iscsi->old_iscsi = iscsi->old_iscsi;
} else {
tmp_iscsi->old_iscsi = malloc(sizeof(struct iscsi_context));

View File

@@ -21,7 +21,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/prctl.h>
#include <fcntl.h>
#include <stdlib.h>
#include "slist.h"
@@ -33,8 +32,7 @@
#include "iser-private.h"
#include "iscsi-private.h"
#include "scsi-lowlevel.h"
#include <pthread.h>
#include <semaphore.h>
#include <sys/eventfd.h>
#include <poll.h>
@@ -53,9 +51,6 @@
#ifdef __linux
/* the name can be up to 16 bytes long, including the terminating null byte*/
#define ISER_CM_THREAD_NAME "iscsi_cm_thread"
/* MUST keep in sync with socket.c */
union socket_address {
struct sockaddr_in sin;
@@ -65,6 +60,7 @@ union socket_address {
static int cq_handle(struct iser_conn *iser_conn);
static int iscsi_iser_revive_queued_pdus(struct iscsi_context *iscsi);
static int iscsi_iser_cm_event(struct iscsi_context *iscsi);
/*
* iscsi_iser_get_fd() - Return completion queue
@@ -73,8 +69,7 @@ static int iscsi_iser_revive_queued_pdus(struct iscsi_context *iscsi);
static int
iscsi_iser_get_fd(struct iscsi_context *iscsi)
{
struct iser_conn *iser_conn = iscsi->opaque;
return iser_conn->comp_channel->fd;
return iscsi->fd;
}
/*
@@ -108,16 +103,27 @@ iscsi_iser_service(struct iscsi_context *iscsi, int revents)
int ret = 0;
struct iser_conn *iser_conn = iscsi->opaque;
if (iscsi->pending_reconnect) {
if (time(NULL) >= iscsi->next_reconnect) {
return iscsi_reconnect(iscsi);
} else {
if (iscsi->old_iscsi) {
return 0;
}
}
}
if (revents == POLLIN)
ret = cq_handle(iser_conn);
else {
ret = iscsi->is_connected ? cq_handle(iser_conn) : iscsi_iser_cm_event(iscsi);
else if (!revents) {
ret = iscsi_iser_cm_event(iscsi);
} else {
iscsi_set_error(iscsi, "revents is not POLLIN");
return -1;
}
if (ret) {
iscsi_set_error(iscsi, "CQ handle Failed");
return -1;
return iscsi_service_reconnect_if_loggedin(iscsi);
}
return iscsi_iser_revive_queued_pdus(iscsi);
@@ -396,11 +402,6 @@ iser_free_iser_conn_res(struct iser_conn *iser_conn, bool destroy)
if (destroy) {
if (iser_conn->cmthread) {
pthread_join(iser_conn->cmthread, NULL);
iser_conn->cmthread = 0;
}
iser_free_reg_mr(iser_conn);
if (iser_conn->rx_descs) {
@@ -452,7 +453,7 @@ iser_conn_release(struct iser_conn *iser_conn)
int ret;
struct iscsi_context *iscsi = iser_conn->cma_id->context;
iser_free_iser_conn_res(iser_conn,true);
iser_free_iser_conn_res(iser_conn, true);
if (iser_conn->cma_id) {
ret = rdma_destroy_id(iser_conn->cma_id);
@@ -480,10 +481,15 @@ iser_conn_terminate(struct iser_conn *iser_conn)
int ret;
struct iscsi_context *iscsi = iser_conn->cma_id->context;
ret = rdma_disconnect(iser_conn->cma_id);
if (ret)
iscsi_set_error(iscsi, "Failed to disconnect, conn: 0x%p, err %d\n",
iser_conn, ret);
if(iser_conn->rdma_connect_sent) {
ret = rdma_disconnect(iser_conn->cma_id);
iser_conn->rdma_connect_sent = 0;
if (ret)
iscsi_set_error(iscsi, "Failed to disconnect, conn: 0x%p, err %d\n",
iser_conn, ret);
}
return;
}
/*
@@ -496,11 +502,15 @@ iscsi_iser_disconnect(struct iscsi_context *iscsi) {
struct iser_conn *iser_conn = iscsi->opaque;
if (iser_conn) {
if (iser_conn->cma_id) {
iser_conn_terminate(iser_conn);
iser_conn_release(iser_conn);
}
if (iscsi->fd != -1) {
close(iscsi->fd);
}
iscsi->fd = -1;
iscsi->is_connected = 0;
iscsi->is_corked = 0;
@@ -970,6 +980,12 @@ iscsi_iser_queue_pdu(struct iscsi_context *iscsi, struct iscsi_pdu *pdu) {
return -1;
}
if (pdu->outdata.data[0] == ISCSI_PDU_NOP_OUT &&
iscsi_iser_cm_event(iscsi) != 0) {
iscsi_service_reconnect_if_loggedin(iscsi);
return -1;
}
if (iscsi->outqueue != NULL ||
(iscsi_serial32_compare(pdu->cmdsn, iscsi->maxcmdsn) > 0
&& !(pdu->outdata.data[0] & ISCSI_PDU_IMMEDIATE))) {
@@ -1157,6 +1173,7 @@ static int iser_route_handler(struct rdma_cm_id *cma_id) {
iscsi_set_error(iscsi, "conn %p failure connecting: %d", iser_conn, ret);
return -1;
}
iser_conn->rdma_connect_sent = 1;
return ret;
login_mr_error:
@@ -1462,7 +1479,15 @@ static int cq_handle(struct iser_conn *iser_conn)
int ret;
struct iscsi_context *iscsi = iser_conn->cma_id->context;
ibv_get_cq_event(iser_conn->comp_channel, &iser_conn->cq, &ev_ctx);
ret = ibv_get_cq_event(iser_conn->comp_channel, &iser_conn->cq, &ev_ctx);
if (ret) {
if (errno != EAGAIN && errno != EWOULDBLOCK) {
iscsi_set_error(iscsi, "failed get cq event %s", strerror(errno));
return -1;
}
goto out;
}
ret = ibv_req_notify_cq(iser_conn->cq, 0);
/* TODO: aggregate ack cq event for efficiency */
@@ -1472,6 +1497,7 @@ static int cq_handle(struct iser_conn *iser_conn)
return -1;
}
out:
ret = cq_event_handler(iser_conn);
if (ret) {
iscsi_set_error(iscsi, "failed CQ handler");
@@ -1493,7 +1519,12 @@ static int iser_connected_handler(struct rdma_cm_id *cma_id) {
struct iscsi_context *iscsi = cma_id->context;
struct iser_conn *iser_conn = iscsi->opaque;
if (dup2(iser_conn->comp_channel->fd, iscsi->fd) == -1) {
return -1;
}
iser_conn->post_recv_buf_count = 0;
iscsi->is_connected = 1;
return 0;
}
@@ -1507,8 +1538,7 @@ static int iser_connected_handler(struct rdma_cm_id *cma_id) {
*
*/
static int
iser_cma_handler(struct iser_conn *iser_conn,struct rdma_cm_id *cma_id, struct rdma_cm_event *event) {
iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) {
int ret = 0;
switch(event->event) {
@@ -1521,69 +1551,64 @@ iser_cma_handler(struct iser_conn *iser_conn,struct rdma_cm_id *cma_id, struct r
break;
case RDMA_CM_EVENT_ESTABLISHED:
ret = iser_connected_handler(cma_id);
if(ret)
iser_conn->conn_state = CONN_ERROR;
else
iser_conn->conn_state = CONN_ESTABLISHED;
sem_post(&iser_conn->sem_connect);
break;
case RDMA_CM_EVENT_ADDR_ERROR:
case RDMA_CM_EVENT_ROUTE_ERROR:
case RDMA_CM_EVENT_CONNECT_ERROR:
case RDMA_CM_EVENT_UNREACHABLE:
case RDMA_CM_EVENT_REJECTED:
iser_conn->conn_state = CONN_ERROR;
ret = -1;
sem_post(&iser_conn->sem_connect);
break;
case RDMA_CM_EVENT_DISCONNECTED:
case RDMA_CM_EVENT_ADDR_CHANGE:
case RDMA_CM_EVENT_TIMEWAIT_EXIT:
iser_conn->conn_state = CONN_DISCONNECTED;
ret = -1;
sem_post(&iser_conn->sem_connect);
break;
default:
iser_conn->conn_state = CONN_ERROR;
ret = -1;
sem_post(&iser_conn->sem_connect);
break;
}
return ret;
}
/*
* iser_connected_handler() - thread to catch rdma cm events
*
* @arg: ib connection context
*
* Notes:
* Need to check if copying event is necessery
*/
static void *cm_thread(void *arg)
{
struct iser_conn *iser_conn = arg;
struct rdma_cm_event event_copy;
static int
iscsi_iser_cm_event(struct iscsi_context *iscsi) {
struct iser_conn *iser_conn = iscsi->opaque;
struct rdma_event_channel *channel = iser_conn->cma_channel;
struct rdma_cm_event *event;
int ret;
struct iscsi_context *iscsi = iser_conn->cma_id->context;
/* supported since Linux 2.6.9, not fatal error, ignore return value */
prctl(PR_SET_NAME, ISER_CM_THREAD_NAME);
while (rdma_get_cm_event(channel, &event) == 0) {
ret = iser_cma_handler(iser_conn->cma_id, event);
rdma_ack_cm_event(event);
while (1) {
ret = rdma_get_cm_event(iser_conn->cma_channel, &iser_conn->cma_event);
if (ret) {
iscsi_set_error(iscsi, "Failed to get RDMA-CM Event\n");
pthread_exit(NULL);
if (iscsi->socket_status_cb != NULL) {
/* connect failed, cleanup itself */
if (iser_conn->cma_id) {
iser_conn_terminate(iser_conn);
iser_conn_release(iser_conn);
iser_conn->cma_id = NULL;
}
iscsi->socket_status_cb(iscsi, SCSI_STATUS_ERROR, NULL, iscsi->connect_data);
iscsi->socket_status_cb = NULL;
}
return -1;
}
memcpy(&event_copy, iser_conn->cma_event, sizeof(struct rdma_cm_event));
ret = iser_cma_handler(iser_conn, iser_conn->cma_id, &event_copy);
rdma_ack_cm_event(iser_conn->cma_event);
if (ret) {
iscsi_set_error(iscsi, "Failed to handle event\n");
pthread_exit(NULL);
if (iscsi->is_connected && iscsi->socket_status_cb != NULL) {
iscsi->socket_status_cb(iscsi, SCSI_STATUS_GOOD, NULL, iscsi->connect_data);
iscsi->socket_status_cb = NULL;
}
}
if (errno != EAGAIN && errno != EWOULDBLOCK) {
iscsi_set_error(iscsi, "Failed to get RDMA-CM Event\n");
return -1;
}
return 0;
}
/*
@@ -1601,9 +1626,33 @@ static int
iscsi_iser_connect(struct iscsi_context *iscsi, union socket_address *sa,__attribute__((unused)) int ai_family) {
struct iser_conn *iser_conn = iscsi->opaque;
int ret;
int flag;
sem_init(&iser_conn->sem_connect, 0, 0);
if (iscsi->old_iscsi && iscsi->fd != iscsi->old_iscsi->fd) {
struct iscsi_context *old_iscsi = iscsi->old_iscsi;
struct iser_conn *old_iser_conn = old_iscsi->opaque;
iscsi->fd = old_iscsi->fd;
if (old_iser_conn) {
if (old_iser_conn->cma_id) {
iser_conn_terminate(old_iser_conn);
iser_conn_release(old_iser_conn);
}
iscsi_free(old_iscsi, old_iser_conn);
old_iscsi->opaque = NULL;
}
}
/* create mock file descriptor */
if (iscsi->fd == -1) {
iscsi->fd = eventfd(0, EFD_CLOEXEC);
if (iscsi->fd < 0) {
return -1;
}
}
iser_conn->cma_channel = rdma_create_event_channel();
@@ -1612,68 +1661,40 @@ iscsi_iser_connect(struct iscsi_context *iscsi, union socket_address *sa,__attri
return -1;
}
if (rdma_create_id(iser_conn->cma_channel, &iser_conn->cma_id, (void *)iscsi, RDMA_PS_TCP)) {
iscsi_set_error(iscsi, "Failed create channel_id");
return -1;
flag = fcntl(iser_conn->cma_channel->fd, F_GETFL);
if (fcntl(iser_conn->cma_channel->fd, F_SETFL, flag | O_NONBLOCK) < 0) {
iscsi_set_error(iscsi, "Cannot set event channel to non blocking");
goto error;
}
ret = pthread_create(&iser_conn->cmthread, NULL, cm_thread, iser_conn);
if(ret) {
iscsi_set_error(iscsi, "Failed create Connection Manager Thread");
return -1;
if (rdma_create_id(iser_conn->cma_channel, &iser_conn->cma_id, (void *)iscsi, RDMA_PS_TCP)) {
iscsi_set_error(iscsi, "Failed create channel_id");
goto error;
}
if (dup2(iser_conn->cma_channel->fd, iscsi->fd) < 0) {
iscsi_set_error(iscsi, "Failed dup event channel fd");
goto error;
}
if(rdma_resolve_addr(iser_conn->cma_id, NULL, &sa->sa, 2000)) {
iscsi_set_error(iscsi, "Failed resolve address");
return -1;
}
sem_wait(&iser_conn->sem_connect);
switch(iser_conn->conn_state) {
case CONN_ERROR:
iscsi_set_error(iscsi, "Conn Error event");
return -1;
case CONN_DISCONNECTED:
iscsi_set_error(iscsi, "Conn disconnected event");
return -1;
case CONN_ESTABLISHED:
break;
default:
iscsi_set_error(iscsi, "Unknown State of connection");
return -1;
}
iscsi->is_connected = 1;
iscsi->socket_status_cb(iscsi, SCSI_STATUS_GOOD, NULL, iscsi->connect_data);
iscsi->socket_status_cb = NULL;
if (iscsi->old_iscsi && iscsi->opaque != iscsi->old_iscsi->opaque) {
struct iser_conn *old_iser_conn = iscsi->old_iscsi->opaque;
int oldfd = old_iser_conn->comp_channel->fd;
int newfd = iser_conn->comp_channel->fd;
iser_conn_terminate(old_iser_conn);
iser_conn_release(old_iser_conn);
if (dup2(newfd, oldfd) == -1) {
return -1;
}
close(newfd);
iser_conn->comp_channel->fd = oldfd;
iscsi_free(iscsi->old_iscsi, iscsi->old_iscsi->opaque);
iscsi->old_iscsi->opaque = NULL;
goto error;
}
return 0;
error:
if (iser_conn->cma_id) {
rdma_destroy_id(iser_conn->cma_id);
iser_conn->cma_id = NULL;
}
if (iser_conn->cma_channel) {
rdma_destroy_event_channel(iser_conn->cma_channel);
iser_conn->cma_channel = NULL;
}
return -1;
}
static iscsi_transport iscsi_transport_iser = {