FairMQ: Switch to verbs:ofi_rxm provider

RDM endpoints seem buggy on pure verbs provider. Switch on RDM emulator
for now.
This commit is contained in:
Dennis Klein 2018-03-13 02:57:49 +01:00 committed by Mohammad Al-Turany
parent 811fe50a48
commit 697c440aa0
2 changed files with 9 additions and 7 deletions

View File

@ -134,16 +134,16 @@ auto Context::InitOfi(ConnectionType type, Address addr) -> void
// Prepare fi_getinfo query // Prepare fi_getinfo query
unique_ptr<fi_info, void(*)(fi_info*)> ofi_hints(fi_allocinfo(), fi_freeinfo); unique_ptr<fi_info, void(*)(fi_info*)> ofi_hints(fi_allocinfo(), fi_freeinfo);
ofi_hints->caps = FI_MSG | FI_RMA; ofi_hints->caps = FI_MSG;
ofi_hints->mode = FI_CONTEXT; //ofi_hints->mode = FI_CONTEXT;
ofi_hints->addr_format = FI_SOCKADDR_IN; ofi_hints->addr_format = FI_SOCKADDR_IN;
if (addr.Protocol == "tcp") { if (addr.Protocol == "tcp") {
ofi_hints->fabric_attr->prov_name = strdup("sockets"); ofi_hints->fabric_attr->prov_name = strdup("sockets");
} else if (addr.Protocol == "verbs") { } else if (addr.Protocol == "verbs") {
ofi_hints->fabric_attr->prov_name = strdup("verbs"); ofi_hints->fabric_attr->prov_name = strdup("verbs;ofi_rxm");
} }
ofi_hints->ep_attr->type = FI_EP_RDM; ofi_hints->ep_attr->type = FI_EP_RDM;
ofi_hints->domain_attr->mr_mode = ~0; //ofi_hints->domain_attr->mr_mode = FI_MR_BASIC | FI_MR_SCALABLE;
ofi_hints->domain_attr->threading = FI_THREAD_SAFE; ofi_hints->domain_attr->threading = FI_THREAD_SAFE;
ofi_hints->domain_attr->control_progress = FI_PROGRESS_AUTO; ofi_hints->domain_attr->control_progress = FI_PROGRESS_AUTO;
ofi_hints->domain_attr->data_progress = FI_PROGRESS_AUTO; ofi_hints->domain_attr->data_progress = FI_PROGRESS_AUTO;

View File

@ -325,11 +325,13 @@ try {
// Send data // Send data
fi_context ctx; fi_context ctx;
auto ret = fi_send(fDataEndpoint, msg->GetData(), size, nullptr, fRemoteDataAddr, &ctx); auto ret = fi_send(fDataEndpoint, msg->GetData(), size, nullptr, fRemoteDataAddr, &ctx);
if (ret != FI_SUCCESS) if (ret < 0)
throw SocketError(tools::ToString("Failed posting ofi send buffer, reason: ", fi_strerror(ret))); throw SocketError(tools::ToString("Failed posting ofi send buffer, reason: ", fi_strerror(ret)));
}
if (size) {
fi_cq_err_entry cqEntry; fi_cq_err_entry cqEntry;
ret = fi_cq_sread(fDataCompletionQueueTx, &cqEntry, 1, nullptr, -1); auto ret = fi_cq_sread(fDataCompletionQueueTx, &cqEntry, 1, nullptr, -1);
if (ret != 1) if (ret != 1)
throw SocketError(tools::ToString("Failed reading ofi tx completion queue event, reason: ", fi_strerror(ret))); throw SocketError(tools::ToString("Failed reading ofi tx completion queue event, reason: ", fi_strerror(ret)));
} }
@ -371,7 +373,7 @@ try {
auto buf = msg->GetData(); auto buf = msg->GetData();
auto size2 = msg->GetSize(); auto size2 = msg->GetSize();
auto ret = fi_recv(fDataEndpoint, buf, size2, nullptr, fRemoteDataAddr, &ctx); auto ret = fi_recv(fDataEndpoint, buf, size2, nullptr, fRemoteDataAddr, &ctx);
if (ret != FI_SUCCESS) if (ret < 0)
throw SocketError(tools::ToString("Failed posting ofi receive buffer, reason: ", fi_strerror(ret))); throw SocketError(tools::ToString("Failed posting ofi receive buffer, reason: ", fi_strerror(ret)));
// Create and send control message // Create and send control message