Implement control band with asiofi

This commit is contained in:
Dennis Klein 2018-11-26 00:20:41 +01:00 committed by Dennis Klein
parent 672e12f45b
commit b31ab1cc48
6 changed files with 366 additions and 332 deletions

View File

@ -31,12 +31,15 @@ namespace ofi
using namespace std;
Context::Context(FairMQTransportFactory& receiveFactory, int numberIoThreads)
Context::Context(FairMQTransportFactory& sendFactory,
FairMQTransportFactory& receiveFactory,
int numberIoThreads)
: fOfiInfo(nullptr)
, fOfiFabric(nullptr)
, fOfiDomain(nullptr)
, fIoWork(fIoContext)
, fReceiveFactory(receiveFactory)
, fSendFactory(sendFactory)
{
InitThreadPool(numberIoThreads);
}
@ -66,73 +69,27 @@ auto Context::GetAsiofiVersion() const -> string
return ASIOFI_VERSION;
}
auto Context::InitOfi(ConnectionType type, Address addr) -> void
auto Context::InitOfi(Address addr) -> void
{
assert(!fOfiInfo);
assert(!fOfiFabric);
assert(!fOfiDomain);
if (!fOfiInfo) {
assert(!fOfiFabric);
assert(!fOfiDomain);
asiofi::hints hints;
if (addr.Protocol == "tcp") {
hints.set_provider("sockets");
} else if (addr.Protocol == "verbs") {
hints.set_provider("verbs");
asiofi::hints hints;
if (addr.Protocol == "tcp") {
hints.set_provider("sockets");
} else if (addr.Protocol == "verbs") {
hints.set_provider("verbs");
}
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints);
// LOG(debug) << "OFI transport: " << *fOfiInfo;
fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo);
fOfiDomain = tools::make_unique<asiofi::domain>(*fOfiFabric);
}
if (type == ConnectionType::Bind) {
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), FI_SOURCE, hints);
} else {
fOfiInfo = tools::make_unique<asiofi::info>(addr.Ip.c_str(), std::to_string(addr.Port).c_str(), 0, hints);
}
// LOG(debug) << "OFI transport: " << *fOfiInfo;
fOfiFabric = tools::make_unique<asiofi::fabric>(*fOfiInfo);
fOfiDomain = tools::make_unique<asiofi::domain>(*fOfiFabric);
}
auto Context::MakeOfiPassiveEndpoint(Address addr) -> unique_ptr<asiofi::passive_endpoint>
{
InitOfi(ConnectionType::Bind, addr);
return tools::make_unique<asiofi::passive_endpoint>(fIoContext, *fOfiFabric);
}
auto Context::MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint>
{
assert(fOfiDomain);
return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain, info);
}
auto Context::MakeOfiConnectedEndpoint(Address addr) -> std::unique_ptr<asiofi::connected_endpoint>
{
InitOfi(ConnectionType::Connect, addr);
return tools::make_unique<asiofi::connected_endpoint>(fIoContext, *fOfiDomain);
}
// auto Context::CreateOfiEndpoint() -> fid_ep*
// {
// assert(fOfiDomain);
// assert(fOfiInfo);
// fid_ep* ep = nullptr;
// fi_context ctx;
// auto ret = fi_endpoint(fOfiDomain, fOfiInfo, &ep, &ctx);
// if (ret != FI_SUCCESS)
// throw ContextError{tools::ToString("Failed creating ofi endpoint, reason: ", fi_strerror(ret))};
//assert(fOfiEventQueue);
//ret = fi_ep_bind(ep, &fOfiEventQueue->fid, 0);
//if (ret != FI_SUCCESS)
// throw ContextError{tools::ToString("Failed binding ofi event queue to ofi endpoint, reason: ", fi_strerror(ret))};
// assert(fOfiAddressVector);
// ret = fi_ep_bind(ep, &fOfiAddressVector->fid, 0);
// if (ret != FI_SUCCESS)
// throw ContextError{tools::ToString("Failed binding ofi address vector to ofi endpoint, reason: ", fi_strerror(ret))};
//
// return ep;
// }
auto Context::ConvertAddress(std::string address) -> Address
{
string protocol, ip;
@ -182,6 +139,11 @@ auto Context::MakeReceiveMessage(size_t size) -> MessagePtr
return fReceiveFactory.CreateMessage(size);
}
auto Context::MakeSendMessage(size_t size) -> MessagePtr
{
return fSendFactory.CreateMessage(size);
}
} /* namespace ofi */
} /* namespace mq */
} /* namespace fair */

View File

@ -12,11 +12,9 @@
#include <FairMQLogger.h>
#include <FairMQTransportFactory.h>
#include <asiofi/connected_endpoint.hpp>
#include <asiofi/domain.hpp>
#include <asiofi/fabric.hpp>
#include <asiofi/info.hpp>
#include <asiofi/passive_endpoint.hpp>
#include <boost/asio/io_context.hpp>
#include <memory>
#include <netinet/in.h>
@ -33,9 +31,6 @@ namespace mq
namespace ofi
{
enum class ConnectionType : bool { Bind, Connect };
enum class Direction : bool { Receive, Transmit };
/**
* @class Context Context.h <fairmq/ofi/Context.h>
* @brief Transport-wide context
@ -45,10 +40,11 @@ enum class Direction : bool { Receive, Transmit };
class Context
{
public:
Context(FairMQTransportFactory& receiveFactory, int numberIoThreads = 1);
Context(FairMQTransportFactory& sendFactory,
FairMQTransportFactory& receiveFactory,
int numberIoThreads = 1);
~Context();
// auto CreateOfiEndpoint() -> fid_ep*;
auto GetAsiofiVersion() const -> std::string;
auto GetIoContext() -> boost::asio::io_context& { return fIoContext; }
struct Address {
@ -57,17 +53,18 @@ class Context
unsigned int Port;
friend auto operator<<(std::ostream& os, const Address& a) -> std::ostream& { return os << a.Protocol << "://" << a.Ip << ":" << a.Port; }
};
auto MakeOfiPassiveEndpoint(Address addr) -> std::unique_ptr<asiofi::passive_endpoint>;
auto MakeOfiConnectedEndpoint(Address addr) -> std::unique_ptr<asiofi::connected_endpoint>;
auto MakeOfiConnectedEndpoint(const asiofi::info& info) -> std::unique_ptr<asiofi::connected_endpoint>;
auto InitOfi(Address address) -> void;
auto GetOfiInfo() const -> const asiofi::info& { return *fOfiInfo; }
auto GetOfiFabric() const -> const asiofi::fabric& { return *fOfiFabric; }
auto GetOfiDomain() const -> const asiofi::domain& { return *fOfiDomain; }
static auto ConvertAddress(std::string address) -> Address;
static auto ConvertAddress(Address address) -> sockaddr_in;
static auto ConvertAddress(sockaddr_in address) -> Address;
static auto VerifyAddress(const std::string& address) -> Address;
auto GetDomain() const -> const asiofi::domain& { return *fOfiDomain; }
auto Interrupt() -> void { LOG(debug) << "OFI transport: Interrupted (NOOP - not implemented)."; }
auto Resume() -> void { LOG(debug) << "OFI transport: Resumed (NOOP - not implemented)."; }
auto MakeReceiveMessage(size_t size) -> MessagePtr;
auto MakeSendMessage(size_t size) -> MessagePtr;
private:
std::unique_ptr<asiofi::info> fOfiInfo;
@ -77,9 +74,9 @@ class Context
boost::asio::io_context::work fIoWork;
std::vector<std::thread> fThreadPool;
FairMQTransportFactory& fReceiveFactory;
FairMQTransportFactory& fSendFactory;
auto InitThreadPool(int numberIoThreads) -> void;
auto InitOfi(ConnectionType type, Address address) -> void;
}; /* class Context */
struct ContextError : std::runtime_error { using std::runtime_error::runtime_error; };

View File

@ -11,6 +11,7 @@
#include <FairMQLogger.h>
#include <boost/asio/buffer.hpp>
#include <boost/container/pmr/memory_resource.hpp>
#include <cstdint>
#include <functional>
#include <memory>
@ -55,6 +56,28 @@ struct PostBuffer : ControlMessage
uint64_t size; // buffer size (size_t)
};
template<typename T>
using unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
template<typename T, typename... Args>
auto MakeControlMessageWithPmr(boost::container::pmr::memory_resource* pmr, Args&&... args)
-> ofi::unique_ptr<T>
{
void* mem = pmr->allocate(sizeof(T));
T* ctrl = new (mem) T(std::forward<Args>(args)...);
if (std::is_same<T, DataAddressAnnouncement>::value) {
ctrl->type = ControlMessageType::DataAddressAnnouncement;
} else if (std::is_same<T, PostBuffer>::value) {
ctrl->type = ControlMessageType::PostBuffer;
}
return ofi::unique_ptr<T>(ctrl, [=](T* p) {
p->~T();
pmr->deallocate(p, sizeof(T));
});
}
template<typename T, typename... Args>
auto MakeControlMessage(Args&&... args) -> T
{

View File

@ -38,36 +38,31 @@ using namespace std;
Socket::Socket(Context& context, const string& type, const string& name, const string& id /*= ""*/)
: fContext(context)
, fPassiveDataEndpoint(nullptr)
, fPassiveEndpoint(nullptr)
, fDataEndpoint(nullptr)
, fControlEndpoint(nullptr)
, fId(id + "." + name + "." + type)
, fBytesTx(0)
, fBytesRx(0)
, fMessagesTx(0)
, fMessagesRx(0)
, fIoStrand(fContext.GetIoContext())
, fControlEndpoint(fIoStrand.context(), ZMQ_PAIR)
, fSndTimeout(100)
, fRcvTimeout(100)
, fSendQueueWrite(fIoStrand.context(), ZMQ_PUSH)
, fSendQueueRead(fIoStrand.context(), ZMQ_PULL)
, fRecvQueueWrite(fIoStrand.context(), ZMQ_PUSH)
, fRecvQueueRead(fIoStrand.context(), ZMQ_PULL)
, fSendSem(fIoStrand.context(), 100)
, fRecvSem(fIoStrand.context(), 100)
, fSendSem(fIoStrand.context(), 300)
, fRecvSem(fIoStrand.context(), 300)
, fNeedOfiMemoryRegistration(false)
{
if (type != "pair") {
throw SocketError{tools::ToString("Socket type '", type, "' not implemented for ofi transport.")};
} else {
fControlEndpoint.set_option(azmq::socket::identity(fId));
// Tell socket to try and send/receive outstanding messages for <linger> milliseconds before terminating.
// Default value for ZeroMQ is -1, which is to wait forever.
fControlEndpoint.set_option(azmq::socket::linger(1000));
// TODO wire this up with config
azmq::socket::snd_hwm send_max(10);
azmq::socket::rcv_hwm recv_max(10);
azmq::socket::snd_hwm send_max(300);
azmq::socket::rcv_hwm recv_max(300);
fSendQueueRead.set_option(send_max);
fSendQueueRead.set_option(recv_max);
fSendQueueWrite.set_option(send_max);
@ -76,8 +71,6 @@ Socket::Socket(Context& context, const string& type, const string& name, const s
fRecvQueueRead.set_option(recv_max);
fSendQueueWrite.set_option(send_max);
fSendQueueWrite.set_option(recv_max);
fControlEndpoint.set_option(send_max);
fControlEndpoint.set_option(recv_max);
// Setup internal queue
auto hashed_id = std::hash<std::string>()(fId);
@ -94,22 +87,25 @@ Socket::Socket(Context& context, const string& type, const string& name, const s
}
}
auto Socket::Bind(const string& address) -> bool
auto Socket::Bind(const string& addr) -> bool
try {
auto addr = Context::VerifyAddress(address);
fLocalAddr = Context::VerifyAddress(addr);
if (fLocalAddr.Protocol == "verbs") {
fNeedOfiMemoryRegistration = true;
}
BindControlEndpoint(addr);
fContext.InitOfi(fLocalAddr);
fPassiveEndpoint = tools::make_unique<asiofi::passive_endpoint>(fIoStrand.context(), fContext.GetOfiFabric());
fPassiveEndpoint->set_local_address(Context::ConvertAddress(fLocalAddr));
BindControlEndpoint();
// TODO make data port choice more robust
addr.Port += 555;
fLocalDataAddr = addr;
BindDataEndpoint();
boost::asio::post(fIoStrand, std::bind(&Socket::SendQueueReader, this));
boost::asio::post(fIoStrand, std::bind(&Socket::RecvControlQueueReader, this));
return true;
}
// TODO catch the correct ofi error
catch (const SilentSocketError& e)
{
// do not print error in this case, this is handled by FairMQDevice
@ -122,14 +118,55 @@ catch (const SocketError& e)
return false;
}
auto Socket::Connect(const string& address) -> bool
auto Socket::BindControlEndpoint() -> void
{
auto addr = Context::VerifyAddress(address);
fRemoteDataAddr = addr;
assert(!fControlEndpoint);
ConnectControlEndpoint(addr);
fPassiveEndpoint->listen([&](fid_t /*handle*/, asiofi::info&& info) {
LOG(debug) << "OFI transport (" << fId
<< "): control band connection request received. Accepting ...";
fControlEndpoint = tools::make_unique<asiofi::connected_endpoint>(
fIoStrand.context(), fContext.GetOfiDomain(), info);
fControlEndpoint->enable();
fControlEndpoint->accept([&]() {
LOG(debug) << "OFI transport (" << fId << "): control band connection accepted.";
});
});
ReceiveDataAddressAnnouncement();
LOG(debug) << "OFI transport (" << fId << "): control band bound to " << fLocalAddr;
}
auto Socket::BindDataEndpoint() -> void
{
assert(!fDataEndpoint);
fPassiveEndpoint->listen([&](fid_t /*handle*/, asiofi::info&& info) {
LOG(debug) << "OFI transport (" << fId
<< "): data band connection request received. Accepting ...";
fDataEndpoint = tools::make_unique<asiofi::connected_endpoint>(
fIoStrand.context(), fContext.GetOfiDomain(), info);
fDataEndpoint->enable();
fDataEndpoint->accept([&]() {
LOG(debug) << "OFI transport (" << fId << "): data band connection accepted.";
boost::asio::post(fIoStrand, std::bind(&Socket::SendQueueReader, this));
boost::asio::post(fIoStrand, std::bind(&Socket::RecvControlQueueReader, this));
});
});
LOG(debug) << "OFI transport (" << fId << "): data band bound to " << fLocalAddr;
}
auto Socket::Connect(const string& address) -> void
{
fRemoteAddr = Context::VerifyAddress(address);
if (fRemoteAddr.Protocol == "verbs") {
fNeedOfiMemoryRegistration = true;
}
fContext.InitOfi(fRemoteAddr);
ConnectControlEndpoint();
ConnectDataEndpoint();
@ -137,110 +174,101 @@ auto Socket::Connect(const string& address) -> bool
boost::asio::post(fIoStrand, std::bind(&Socket::RecvControlQueueReader, this));
}
auto Socket::BindControlEndpoint(Context::Address address) -> void
auto Socket::ConnectControlEndpoint() -> void
{
auto addr = tools::ToString("tcp://", address.Ip, ":", address.Port);
fControlEndpoint.bind(addr);
// if (zmq_bind(fControlSocket, addr.c_str()) != 0) {
// TODO if (errno == EADDRINUSE) throw SilentSocketError("EADDRINUSE");
// throw SocketError(tools::ToString("Failed binding control socket ", fId, ", reason: ", zmq_strerror(errno)));
// }
LOG(debug) << "OFI transport (" << fId << "): control band bound to " << address;
}
auto Socket::BindDataEndpoint() -> void
{
assert(!fPassiveDataEndpoint);
assert(!fDataEndpoint);
assert(!fControlEndpoint);
std::mutex m;
std::condition_variable cv;
bool completed(false);
fPassiveDataEndpoint = fContext.MakeOfiPassiveEndpoint(fLocalDataAddr);
fPassiveDataEndpoint->listen([&](fid_t /*handle*/, asiofi::info&& info) {
LOG(debug) << "OFI transport (" << fId << "): data band connection request received. Accepting ...";
fDataEndpoint = fContext.MakeOfiConnectedEndpoint(info);
fDataEndpoint->enable();
fDataEndpoint->accept([&]() {
{
std::unique_lock<std::mutex> lk(m);
completed = true;
}
cv.notify_one();
});
fControlEndpoint =
tools::make_unique<asiofi::connected_endpoint>(fIoStrand.context(), fContext.GetOfiDomain());
fControlEndpoint->enable();
fControlEndpoint->connect(Context::ConvertAddress(fRemoteAddr), [&]() {
{
std::unique_lock<std::mutex> lk(m);
completed = true;
}
cv.notify_one();
});
LOG(debug) << "OFI transport (" << fId << "): data band bound to " << fLocalDataAddr;
AnnounceDataAddress();
LOG(debug) << "OFI transport (" << fId << "): control band connection request sent to "
<< fRemoteAddr;
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, [&](){ return completed; });
cv.wait(lk, [&]() { return completed; });
}
LOG(debug) << "OFI transport (" << fId << "): data band connection accepted.";
}
auto Socket::ConnectControlEndpoint(Context::Address address) -> void
{
auto addr = tools::ToString("tcp://", address.Ip, ":", address.Port);
fControlEndpoint.connect(addr);
LOG(debug) << "OFI transport (" << fId << "): control band connected to " << address;
LOG(debug) << "OFI transport (" << fId << "): control band connected.";
}
auto Socket::ConnectDataEndpoint() -> void
{
assert(!fDataEndpoint);
fDataEndpoint = fContext.MakeOfiConnectedEndpoint(fRemoteDataAddr);
std::mutex m;
std::condition_variable cv;
bool completed(false);
fDataEndpoint =
tools::make_unique<asiofi::connected_endpoint>(fIoStrand.context(), fContext.GetOfiDomain());
fDataEndpoint->enable();
LOG(debug) << "OFI transport (" << fId << "): local data band address: " << Context::ConvertAddress(fDataEndpoint->get_local_address());
fDataEndpoint->connect([&]() {
LOG(debug) << "OFI transport (" << fId << "): data band connected.";
fDataEndpoint->connect(Context::ConvertAddress(fRemoteAddr), [&]() {
{
std::unique_lock<std::mutex> lk(m);
completed = true;
}
cv.notify_one();
});
LOG(debug) << "OFI transport (" << fId << "): data band connection request sent to "
<< fRemoteAddr;
{
std::unique_lock<std::mutex> lk(m);
cv.wait(lk, [&]() { return completed; });
}
LOG(debug) << "OFI transport (" << fId << "): data band connected.";
}
auto Socket::ReceiveDataAddressAnnouncement() -> void
{
azmq::message ctrl;
auto recv = fControlEndpoint.receive(ctrl);
assert(recv == sizeof(DataAddressAnnouncement)); (void)recv;
auto daa(static_cast<const DataAddressAnnouncement*>(ctrl.data()));
assert(daa->type == ControlMessageType::DataAddressAnnouncement);
sockaddr_in remoteAddr;
remoteAddr.sin_family = AF_INET;
remoteAddr.sin_port = daa->port;
remoteAddr.sin_addr.s_addr = daa->ipv4;
auto addr = Context::ConvertAddress(remoteAddr);
addr.Protocol = fRemoteDataAddr.Protocol;
LOG(debug) << "OFI transport (" << fId << "): Data address announcement of remote endpoint received: " << addr;
fRemoteDataAddr = addr;
}
auto Socket::AnnounceDataAddress() -> void
{
// auto Socket::ReceiveDataAddressAnnouncement() -> void
// {
// azmq::message ctrl;
// auto recv = fControlEndpoint.receive(ctrl);
// assert(recv == sizeof(DataAddressAnnouncement)); (void)recv;
// auto daa(static_cast<const DataAddressAnnouncement*>(ctrl.data()));
// assert(daa->type == ControlMessageType::DataAddressAnnouncement);
//
// sockaddr_in remoteAddr;
// remoteAddr.sin_family = AF_INET;
// remoteAddr.sin_port = daa->port;
// remoteAddr.sin_addr.s_addr = daa->ipv4;
//
// auto addr = Context::ConvertAddress(remoteAddr);
// addr.Protocol = fRemoteDataAddr.Protocol;
// LOG(debug) << "OFI transport (" << fId << "): Data address announcement of remote endpoint received: " << addr;
// fRemoteDataAddr = addr;
// }
//
// auto Socket::AnnounceDataAddress() -> void
// {
// fLocalDataAddr = fDataEndpoint->get_local_address();
// LOG(debug) << "Address of local ofi endpoint in socket " << fId << ": " << Context::ConvertAddress(fLocalDataAddr);
//
// Create new data address announcement message
auto daa = MakeControlMessage<DataAddressAnnouncement>();
auto addr = Context::ConvertAddress(fLocalDataAddr);
daa.ipv4 = addr.sin_addr.s_addr;
daa.port = addr.sin_port;
auto sent = fControlEndpoint.send(boost::asio::buffer(daa));
assert(sent == sizeof(addr)); (void)sent;
LOG(debug) << "OFI transport (" << fId << "): data band address " << fLocalDataAddr << " announced.";
}
// auto daa = MakeControlMessage<DataAddressAnnouncement>();
// auto addr = Context::ConvertAddress(fLocalDataAddr);
// daa.ipv4 = addr.sin_addr.s_addr;
// daa.port = addr.sin_port;
//
// auto sent = fControlEndpoint.send(boost::asio::buffer(daa));
// assert(sent == sizeof(addr)); (void)sent;
//
// LOG(debug) << "OFI transport (" << fId << "): data band address " << fLocalDataAddr << " announced.";
// }
auto Socket::Send(MessagePtr& msg, const int /*timeout*/) -> int
{
@ -265,7 +293,7 @@ auto Socket::Send(MessagePtr& msg, const int /*timeout*/) -> int
auto Socket::Receive(MessagePtr& msg, const int /*timeout*/) -> int
{
LOG(debug) << "OFI transport (" << fId << "): ENTER Receive";
// LOG(debug) << "OFI transport (" << fId << "): ENTER Receive";
try {
azmq::message zmsg;
@ -280,7 +308,7 @@ auto Socket::Receive(MessagePtr& msg, const int /*timeout*/) -> int
fBytesRx += size;
fMessagesRx++;
LOG(debug) << "OFI transport (" << fId << "): LEAVE Receive";
// LOG(debug) << "OFI transport (" << fId << "): LEAVE Receive";
return size;
} catch (const std::exception& e) {
LOG(error) << e.what();
@ -299,7 +327,7 @@ auto Socket::SendQueueReader() -> void
fSendSem.async_wait(
boost::asio::bind_executor(fIoStrand, [&](const boost::system::error_code& ec) {
if (!ec) {
LOG(debug) << "OFI transport (" << fId << "): < Wait fSendSem=" << fSendSem.get_value();
// LOG(debug) << "OFI transport (" << fId << "): < Wait fSendSem=" << fSendSem.get_value();
fSendQueueRead.async_receive([&](const boost::system::error_code& ec2,
azmq::message& zmsg,
size_t bytes_transferred) {
@ -311,7 +339,7 @@ auto Socket::SendQueueReader() -> void
}));
}
auto Socket::OnSend(azmq::message& zmsg, size_t bytes_transferred) -> void
auto Socket::OnSend(azmq::message& zmsg, size_t /*bytes_transferred*/) -> void
{
// LOG(debug) << "OFI transport (" << fId << "): ENTER OnSend: bytes_transferred=" << bytes_transferred;
@ -321,67 +349,72 @@ auto Socket::OnSend(azmq::message& zmsg, size_t bytes_transferred) -> void
// LOG(debug) << "OFI transport (" << fId << "): OnSend: data=" << msg->GetData() << ",size=" << msg->GetSize();
// Create and send control message
auto pb = MakeControlMessage<PostBuffer>();
pb.size = size;
fControlEndpoint.async_send(
azmq::message(boost::asio::buffer(pb)),
[&, msg2 = std::move(msg)](const boost::system::error_code& ec, size_t bytes_transferred2) mutable {
if (!ec) {
OnControlMessageSent(bytes_transferred2, std::move(msg2));
}
});
// LOG(debug) << "OFI transport (" << fId << "): LEAVE OnSend";
}
auto Socket::OnControlMessageSent(size_t bytes_transferred, MessagePtr msg) -> void
{
// LOG(debug) << "OFI transport (" << fId
// << "): ENTER OnControlMessageSent: bytes_transferred=" << bytes_transferred
// << ",data=" << msg->GetData() << ",size=" << msg->GetSize();
assert(bytes_transferred == sizeof(PostBuffer));
auto size = msg->GetSize();
if (size) {
// Receive ack
// azmq::message ctrl;
// auto recv = fControlEndpoint.receive(ctrl);
// assert(recv == sizeof(PostBuffer));
// (void)recv;
// auto ack(static_cast<const PostBuffer*>(ctrl.data()));
// assert(ack->type == ControlMessageType::PostBuffer);
// (void)ack;
// LOG(debug) << "OFI transport (" << fId << "): >>>>> SendImpl: Control ack
// received, size_ack=" << size_ack;
boost::asio::mutable_buffer buffer(msg->GetData(), size);
// asiofi::memory_region mr(fContext.GetDomain(), buffer, asiofi::mr::access::send);
// auto desc = mr.desc();
fDataEndpoint->send(
buffer,
// desc,
[&, size, msg2 = std::move(msg)/*, mr2 = std::move(mr)*/](boost::asio::mutable_buffer) mutable {
// LOG(debug) << "OFI transport (" << fId << "): >>>>> Data buffer sent";
fBytesTx += size;
fMessagesTx++;
fSendSem.async_signal([&](const boost::system::error_code& ec){
if (!ec) {
LOG(debug) << "OFI transport (" << fId << "): > Signal fSendSem=" << fSendSem.get_value();
}
});
auto ctrl = MakeControlMessageWithPmr<PostBuffer>(&fControlMemPool);
ctrl->size = size;
auto ctrl_msg = boost::asio::mutable_buffer(ctrl.get(), sizeof(PostBuffer));
if (fNeedOfiMemoryRegistration) {
asiofi::memory_region mr(fContext.GetOfiDomain(), ctrl_msg, asiofi::mr::access::send);
auto desc = mr.desc();
fControlEndpoint->send(
ctrl_msg, desc, [&, ctrl2 = std::move(ctrl)](boost::asio::mutable_buffer) mutable {
// LOG(debug) << "OFI transport (" << fId << "): >>>>> Control message sent";
});
} else {
fControlEndpoint->send(ctrl_msg,
[&, ctrl2 = std::move(ctrl)](boost::asio::mutable_buffer) mutable {
// LOG(debug) << "OFI transport (" << fId << "): >>>>> Control
// message sent";
});
}
if (size) {
boost::asio::mutable_buffer buffer(msg->GetData(), size);
if (fNeedOfiMemoryRegistration) {
asiofi::memory_region mr(fContext.GetOfiDomain(), buffer, asiofi::mr::access::send);
auto desc = mr.desc();
fDataEndpoint->send(buffer,
desc,
[&, size, msg2 = std::move(msg), mr2 = std::move(mr)](
boost::asio::mutable_buffer) mutable {
// LOG(debug) << "OFI transport (" << fId << "): >>>>> Data
// buffer sent";
fBytesTx += size;
fMessagesTx++;
fSendSem.async_signal([&](const boost::system::error_code& ec) {
if (!ec) {
// LOG(debug) << "OFI transport (" << fId << "): >
// Signal fSendSem=" << fSendSem.get_value();
}
});
});
} else {
fDataEndpoint->send(
buffer, [&, size, msg2 = std::move(msg)](boost::asio::mutable_buffer) mutable {
// LOG(debug) << "OFI transport (" << fId << "): >>>>> Data buffer sent";
fBytesTx += size;
fMessagesTx++;
fSendSem.async_signal([&](const boost::system::error_code& ec) {
if (!ec) {
// LOG(debug) << "OFI transport (" << fId << "): > Signal fSendSem="
// << fSendSem.get_value();
}
});
});
}
} else {
++fMessagesTx;
fSendSem.async_signal([&](const boost::system::error_code& ec) {
if (!ec) {
LOG(debug) << "OFI transport (" << fId << "): > Signal fSendSem=" << fSendSem.get_value();
// LOG(debug) << "OFI transport (" << fId << "): > Signal fSendSem=" << fSendSem.get_value();
}
});
}
boost::asio::post(fIoStrand, std::bind(&Socket::SendQueueReader, this));
// LOG(debug) << "OFI transport (" << fId << "): LEAVE OnControlMessageSent";
// LOG(debug) << "OFI transport (" << fId << "): LEAVE OnSend";
}
auto Socket::RecvControlQueueReader() -> void
@ -389,77 +422,89 @@ auto Socket::RecvControlQueueReader() -> void
fRecvSem.async_wait(
boost::asio::bind_executor(fIoStrand, [&](const boost::system::error_code& ec) {
if (!ec) {
fControlEndpoint.async_receive([&](const boost::system::error_code& ec2,
azmq::message& zmsg,
size_t bytes_transferred) {
if (!ec2) {
OnRecvControl(zmsg, bytes_transferred);
}
auto ctrl = MakeControlMessageWithPmr<PostBuffer>(&fControlMemPool);
auto ctrl_msg = boost::asio::mutable_buffer(ctrl.get(), sizeof(PostBuffer));
fControlEndpoint->recv(ctrl_msg, [&, ctrl2 = std::move(ctrl)](boost::asio::mutable_buffer) mutable {
OnRecvControl(std::move(ctrl2));
});
}
}));
}
auto Socket::OnRecvControl(azmq::message& zmsg, size_t bytes_transferred) -> void
auto Socket::OnRecvControl(ofi::unique_ptr<PostBuffer> ctrl) -> void
{
LOG(debug) << "OFI transport (" << fId
<< "): ENTER OnRecvControl: bytes_transferred=" << bytes_transferred;
// LOG(debug) << "OFI transport (" << fId << "): ENTER OnRecvControl";
assert(bytes_transferred == sizeof(PostBuffer));
auto pb(static_cast<const PostBuffer*>(zmsg.data()));
assert(pb->type == ControlMessageType::PostBuffer);
auto size = pb->size;
LOG(debug) << "OFI transport (" << fId << "): OnRecvControl: PostBuffer.size=" << size;
auto size = ctrl->size;
// LOG(debug) << "OFI transport (" << fId << "): OnRecvControl: PostBuffer.size=" << size;
// Receive data
if (size) {
auto msg = fContext.MakeReceiveMessage(size);
boost::asio::mutable_buffer buffer(msg->GetData(), size);
// asiofi::memory_region mr(fContext.GetDomain(), buffer, asiofi::mr::access::recv);
// auto msg33 = fContext.MakeReceiveMessage(size);
// boost::asio::mutable_buffer buffer33(msg33->GetData(), size);
// asiofi::memory_region mr33(fContext.GetDomain(), buffer33, asiofi::mr::access::recv);
// auto desc = mr.desc();
fDataEndpoint->recv(
buffer,
// desc,
[&, msg2 = std::move(msg)/*, mr2 = std::move(mr)*/](boost::asio::mutable_buffer) mutable {
MessagePtr* msgptr(new std::unique_ptr<Message>(std::move(msg2)));
fRecvQueueWrite.async_send(
azmq::message(boost::asio::const_buffer(msgptr, sizeof(MessagePtr))),
[&](const boost::system::error_code& ec, size_t bytes_transferred2) {
if (!ec) {
LOG(debug) << "OFI transport (" << fId
<< "): <<<<< Data buffer received, bytes_transferred2="
<< bytes_transferred2;
fRecvSem.async_signal([&](const boost::system::error_code& ec2) {
if (!ec2) {
LOG(debug)
<< "OFI transport (" << fId << "): < Signal fRecvSem";
}
});
}
});
});
// LOG(debug) << "OFI transport (" << fId << "): <<<<< ReceiveImpl: Data buffer posted";
if (fNeedOfiMemoryRegistration) {
asiofi::memory_region mr(fContext.GetOfiDomain(), buffer, asiofi::mr::access::recv);
auto desc = mr.desc();
// auto ack = MakeControlMessage<PostBuffer>();
// ack.size = size;
// auto sent = fControlEndpoint.send(boost::asio::buffer(ack));
// assert(sent == sizeof(PostBuffer)); (void)sent;
// LOG(debug) << "OFI transport (" << fId << "): <<<<< ReceiveImpl: Control Ack sent";
fDataEndpoint->recv(
buffer,
desc,
[&, msg2 = std::move(msg), mr2 = std::move(mr)](
boost::asio::mutable_buffer) mutable {
MessagePtr* msgptr(new std::unique_ptr<Message>(std::move(msg2)));
fRecvQueueWrite.async_send(
azmq::message(boost::asio::const_buffer(msgptr, sizeof(MessagePtr))),
[&](const boost::system::error_code& ec, size_t /*bytes_transferred2*/) {
if (!ec) {
// LOG(debug) << "OFI transport (" << fId
// << "): <<<<< Data buffer received, bytes_transferred2="
// << bytes_transferred2;
fRecvSem.async_signal([&](const boost::system::error_code& ec2) {
if (!ec2) {
// LOG(debug)
// << "OFI transport (" << fId << "): < Signal
// fRecvSem";
}
});
}
});
});
} else {
fDataEndpoint->recv(
buffer, [&, msg2 = std::move(msg)](boost::asio::mutable_buffer) mutable {
MessagePtr* msgptr(new std::unique_ptr<Message>(std::move(msg2)));
fRecvQueueWrite.async_send(
azmq::message(boost::asio::const_buffer(msgptr, sizeof(MessagePtr))),
[&](const boost::system::error_code& ec, size_t /*bytes_transferred2*/) {
if (!ec) {
// LOG(debug) << "OFI transport (" << fId
// << "): <<<<< Data buffer received, bytes_transferred2="
// << bytes_transferred2;
fRecvSem.async_signal([&](const boost::system::error_code& ec2) {
if (!ec2) {
// LOG(debug)
// << "OFI transport (" << fId << "): < Signal
// fRecvSem";
}
});
}
});
});
}
} else {
fRecvQueueWrite.async_send(
azmq::message(boost::asio::const_buffer(nullptr, 0)),
[&](const boost::system::error_code& ec, size_t bytes_transferred2) {
[&](const boost::system::error_code& ec, size_t /*bytes_transferred2*/) {
if (!ec) {
LOG(debug) << "OFI transport (" << fId
<< "): <<<<< Data buffer received, bytes_transferred2="
<< bytes_transferred2;
// LOG(debug) << "OFI transport (" << fId
// << "): <<<<< Data buffer received, bytes_transferred2="
// << bytes_transferred2;
fRecvSem.async_signal([&](const boost::system::error_code& ec2) {
if (!ec2) {
LOG(debug) << "OFI transport (" << fId << "): < Signal fRecvSem";
// LOG(debug) << "OFI transport (" << fId << "): < Signal fRecvSem";
}
});
}
@ -468,7 +513,7 @@ auto Socket::OnRecvControl(azmq::message& zmsg, size_t bytes_transferred) -> voi
boost::asio::post(fIoStrand, std::bind(&Socket::RecvControlQueueReader, this));
LOG(debug) << "OFI transport (" << fId << "): LEAVE OnRecvControl";
// LOG(debug) << "OFI transport (" << fId << "): LEAVE OnRecvControl";
}
auto Socket::SendImpl(vector<FairMQMessagePtr>& /*msgVec*/, const int /*flags*/, const int /*timeout*/) -> int64_t
@ -658,69 +703,74 @@ auto Socket::GetOption(const string& /*option*/, void* /*value*/, size_t* /*valu
// }
}
void Socket::SetLinger(const int value)
void Socket::SetLinger(const int /*value*/)
{
azmq::socket::linger opt(value);
fControlEndpoint.set_option(opt);
// azmq::socket::linger opt(value);
// fControlEndpoint.set_option(opt);
}
int Socket::GetLinger() const
{
azmq::socket::linger opt(0);
fControlEndpoint.get_option(opt);
return opt.value();
// azmq::socket::linger opt(0);
// fControlEndpoint.get_option(opt);
// return opt.value();
return 0;
}
void Socket::SetSndBufSize(const int value)
void Socket::SetSndBufSize(const int /*value*/)
{
azmq::socket::snd_hwm opt(value);
fControlEndpoint.set_option(opt);
// azmq::socket::snd_hwm opt(value);
// fControlEndpoint.set_option(opt);
}
int Socket::GetSndBufSize() const
{
azmq::socket::snd_hwm opt(0);
fControlEndpoint.get_option(opt);
return opt.value();
// azmq::socket::snd_hwm opt(0);
// fControlEndpoint.get_option(opt);
// return opt.value();
return 0;
}
void Socket::SetRcvBufSize(const int value)
void Socket::SetRcvBufSize(const int /*value*/)
{
azmq::socket::rcv_hwm opt(value);
fControlEndpoint.set_option(opt);
// azmq::socket::rcv_hwm opt(value);
// fControlEndpoint.set_option(opt);
}
int Socket::GetRcvBufSize() const
{
azmq::socket::rcv_hwm opt(0);
fControlEndpoint.get_option(opt);
return opt.value();
// azmq::socket::rcv_hwm opt(0);
// fControlEndpoint.get_option(opt);
// return opt.value();
return 0;
}
void Socket::SetSndKernelSize(const int value)
void Socket::SetSndKernelSize(const int /*value*/)
{
azmq::socket::snd_buf opt(value);
fControlEndpoint.set_option(opt);
// azmq::socket::snd_buf opt(value);
// fControlEndpoint.set_option(opt);
}
int Socket::GetSndKernelSize() const
{
azmq::socket::snd_buf opt(0);
fControlEndpoint.get_option(opt);
return opt.value();
// azmq::socket::snd_buf opt(0);
// fControlEndpoint.get_option(opt);
// return opt.value();
return 0;
}
void Socket::SetRcvKernelSize(const int value)
void Socket::SetRcvKernelSize(const int /*value*/)
{
azmq::socket::rcv_buf opt(value);
fControlEndpoint.set_option(opt);
// azmq::socket::rcv_buf opt(value);
// fControlEndpoint.set_option(opt);
}
int Socket::GetRcvKernelSize() const
{
azmq::socket::rcv_buf opt(0);
fControlEndpoint.get_option(opt);
return opt.value();
// azmq::socket::rcv_buf opt(0);
// fControlEndpoint.get_option(opt);
// return opt.value();
return 0;
}
auto Socket::GetConstant(const string& constant) -> int

View File

@ -15,6 +15,8 @@
#include <fairmq/ofi/ControlMessages.h>
#include <asiofi/connected_endpoint.hpp>
#include <asiofi/memory_resources.hpp>
#include <asiofi/passive_endpoint.hpp>
#include <asiofi/semaphore.hpp>
#include <azmq/socket.hpp>
#include <boost/asio.hpp>
@ -51,7 +53,7 @@ class Socket final : public fair::mq::Socket
auto Send(std::vector<MessagePtr>& msgVec, int timeout = 0) -> int64_t override;
auto Receive(std::vector<MessagePtr>& msgVec, int timeout = 0) -> int64_t override;
auto GetSocket() const -> void* { return fControlEndpoint.native_handle(); }
auto GetSocket() const -> void* { return nullptr; }
void SetLinger(const int value) override;
int GetLinger() const override;
@ -80,40 +82,40 @@ class Socket final : public fair::mq::Socket
private:
Context& fContext;
std::unique_ptr<asiofi::passive_endpoint> fPassiveDataEndpoint;
std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint;
std::unique_ptr<asiofi::passive_endpoint> fPassiveEndpoint;
std::unique_ptr<asiofi::connected_endpoint> fDataEndpoint, fControlEndpoint;
std::string fId;
std::atomic<unsigned long> fBytesTx;
std::atomic<unsigned long> fBytesRx;
std::atomic<unsigned long> fMessagesTx;
std::atomic<unsigned long> fMessagesRx;
Context::Address fRemoteDataAddr;
Context::Address fLocalDataAddr;
Context::Address fRemoteAddr;
Context::Address fLocalAddr;
boost::asio::io_service::strand fIoStrand;
mutable azmq::socket fControlEndpoint;
int fSndTimeout;
int fRcvTimeout;
azmq::socket fSendQueueWrite, fSendQueueRead;
azmq::socket fRecvQueueWrite, fRecvQueueRead;
asiofi::semaphore fSendSem, fRecvSem;
asiofi::allocated_pool_resource fControlMemPool;
std::atomic<bool> fNeedOfiMemoryRegistration;
auto SendQueueReader() -> void;
auto OnSend(azmq::message& msg, size_t bytes_transferred) -> void;
auto OnControlMessageSent(size_t bytes_transferred, MessagePtr msg) -> void;
auto RecvControlQueueReader() -> void;
auto OnRecvControl(azmq::message& msg, size_t bytes_transferred) -> void;
auto OnRecvControl(ofi::unique_ptr<PostBuffer> ctrl) -> void;
auto OnReceive() -> void;
auto ReceiveImpl(MessagePtr& msg, const int flags, const int timeout) -> int;
auto SendImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
auto ReceiveImpl(std::vector<MessagePtr>& msgVec, const int flags, const int timeout) -> int64_t;
// auto WaitForControlPeer() -> void;
auto AnnounceDataAddress() -> void;
auto ConnectControlEndpoint(Context::Address address) -> void;
auto BindControlEndpoint(Context::Address address) -> void;
// auto AnnounceDataAddress() -> void;
auto BindControlEndpoint() -> void;
auto BindDataEndpoint() -> void;
auto ConnectControlEndpoint() -> void;
auto ConnectDataEndpoint() -> void;
auto ReceiveDataAddressAnnouncement() -> void;
// auto ReceiveDataAddressAnnouncement() -> void;
}; /* class Socket */
struct SilentSocketError : SocketError { using SocketError::SocketError; };

View File

@ -25,7 +25,7 @@ using namespace std;
TransportFactory::TransportFactory(const string& id, const FairMQProgOptions* /*config*/)
try : FairMQTransportFactory(id)
, fContext(*this, 1)
, fContext(*this, *this, 1)
{
LOG(debug) << "OFI transport: Using AZMQ & "
<< "asiofi (" << fContext.GetAsiofiVersion() << ")";