shmem: Heartbeats for the monitor.

This commit is contained in:
Alexey Rybalchenko
2017-04-12 11:21:43 +02:00
committed by Mohammad Al-Turany
parent a8d40abbd9
commit 14f9084a80
5 changed files with 142 additions and 14 deletions

View File

@@ -5,10 +5,18 @@
* GNU Lesser General Public Licence version 3 (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include "zmq.h"
#include <zmq.h>
#include <boost/version.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/interprocess/sync/scoped_lock.hpp>
#include <chrono>
#include "FairMQLogger.h"
#include "FairMQShmManager.h"
#include "FairMQTransportFactorySHM.h"
@@ -16,11 +24,17 @@
using namespace std;
using namespace FairMQ::shmem;
namespace bipc = boost::interprocess;
FairMQ::Transport FairMQTransportFactorySHM::fTransportType = FairMQ::Transport::SHM;
FairMQTransportFactorySHM::FairMQTransportFactorySHM()
: fContext(nullptr)
, fHeartbeatSocket(nullptr)
, fHeartbeatThread()
, fSendHeartbeats(true)
, fShMutex(bipc::open_or_create, "fairmq_shmem_mutex")
, fDeviceCounter(nullptr)
{
int major, minor, patch;
zmq_version(&major, &minor, &patch);
@@ -51,17 +65,65 @@ void FairMQTransportFactorySHM::Initialize(const FairMQProgOptions* config)
if (zmq_ctx_set(fContext, ZMQ_IO_THREADS, numIoThreads) != 0)
{
LOG(ERROR) << "failed configuring context, reason: " << zmq_strerror(errno);
LOG(ERROR) << "shmem: failed configuring context, reason: " << zmq_strerror(errno);
}
// Set the maximum number of allowed sockets on the context.
if (zmq_ctx_set(fContext, ZMQ_MAX_SOCKETS, 10000) != 0)
{
LOG(ERROR) << "failed configuring context, reason: " << zmq_strerror(errno);
LOG(ERROR) << "shmem: failed configuring context, reason: " << zmq_strerror(errno);
}
fSendHeartbeats = true;
fHeartbeatThread = thread(&FairMQTransportFactorySHM::SendHeartbeats, this);
Manager::Instance().InitializeSegment("open_or_create", "FairMQSharedMemory", segmentSize);
LOG(DEBUG) << "shmem: created/opened shared memory segment of " << segmentSize << " bytes. Available are " << Manager::Instance().Segment()->get_free_memory() << " bytes.";
{ // mutex scope
bipc::scoped_lock<bipc::named_mutex> lock(fShMutex);
pair<FairMQShmDeviceCounter*, size_t> result = Manager::Instance().Segment()->find<FairMQShmDeviceCounter>(bipc::unique_instance);
if (result.first != nullptr)
{
fDeviceCounter = result.first;
LOG(DEBUG) << "shmem: device counter found, with value of " << fDeviceCounter->count << ". incrementing.";
(fDeviceCounter->count)++;
LOG(DEBUG) << "shmem: incremented device counter, now: " << fDeviceCounter->count;
}
else
{
LOG(DEBUG) << "shmem: no device counter found, creating one and initializing with 1";
fDeviceCounter = Manager::Instance().Segment()->construct<FairMQShmDeviceCounter>(bipc::unique_instance)(1);
LOG(DEBUG) << "shmem: initialized device counter with: " << fDeviceCounter->count;
}
}
}
void FairMQTransportFactorySHM::SendHeartbeats()
{
while (fSendHeartbeats)
{
try
{
bipc::message_queue mq(bipc::open_only, "fairmq_shmem_control_queue");
bool heartbeat = true;
boost::posix_time::ptime sndTill = boost::posix_time::microsec_clock::universal_time() + boost::posix_time::milliseconds(100);
if (mq.timed_send(&heartbeat, sizeof(heartbeat), 0, sndTill))
{
this_thread::sleep_for(chrono::milliseconds(100));
}
else
{
LOG(DEBUG) << "control queue timeout";
}
}
catch (bipc::interprocess_exception& ie)
{
this_thread::sleep_for(chrono::milliseconds(500));
// LOG(WARN) << "no fairmq_shmem_control_queue found";
}
}
}
FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage() const
@@ -106,6 +168,9 @@ void FairMQTransportFactorySHM::Shutdown()
{
LOG(ERROR) << "shmem: failed shutting down context, reason: " << zmq_strerror(errno);
}
fSendHeartbeats = false;
fHeartbeatThread.join();
}
void FairMQTransportFactorySHM::Terminate()
@@ -120,23 +185,38 @@ void FairMQTransportFactorySHM::Terminate()
}
else
{
fContext = NULL;
fContext = nullptr;
return;
}
}
}
else
{
LOG(ERROR) << "shmem: Terminate(): context now available for shutdown";
LOG(ERROR) << "shmem: Terminate(): context not available for shutdown";
}
if (boost::interprocess::shared_memory_object::remove("FairMQSharedMemory"))
{
LOG(DEBUG) << "shmem: successfully removed shared memory segment after the device has stopped.";
}
else
{
LOG(DEBUG) << "shmem: did not remove shared memory segment after the device stopped. Already removed?";
{ // mutex scope
bipc::scoped_lock<bipc::named_mutex> lock(fShMutex);
(fDeviceCounter->count)--;
if (fDeviceCounter->count == 0)
{
LOG(DEBUG) << "shmem: last FairMQSharedMemory user, removing segment.";
if (bipc::shared_memory_object::remove("FairMQSharedMemory"))
{
LOG(DEBUG) << "shmem: successfully removed shared memory segment after the device has stopped.";
}
else
{
LOG(DEBUG) << "shmem: did not remove shared memory segment after the device stopped. Already removed?";
}
}
else
{
LOG(DEBUG) << "shmem: other FairMQSharedMemory users present (" << fDeviceCounter->count << "), not removing it.";
}
}
}