FairMQRegion and examples/advanced/Region

This commit is contained in:
Alexey Rybalchenko
2017-06-02 13:50:13 +02:00
committed by Mohammad Al-Turany
parent 65f1b96dc3
commit 221d2567f0
32 changed files with 843 additions and 156 deletions

View File

@@ -9,11 +9,15 @@
#include <cstdlib>
#include "FairMQMessageSHM.h"
#include "FairMQRegionSHM.h"
#include "FairMQLogger.h"
#include "FairMQShmCommon.h"
using namespace std;
using namespace fair::mq::shmem;
namespace bipc = boost::interprocess;
// uint64_t FairMQMessageSHM::fMessageID = 0;
// string FairMQMessageSHM::fDeviceID = string();
atomic<bool> FairMQMessageSHM::fInterrupted(false);
@@ -25,9 +29,11 @@ FairMQMessageSHM::FairMQMessageSHM()
// , fReceiving(false)
, fQueued(false)
, fMetaCreated(false)
, fRegionId(0)
, fHandle()
, fChunkSize(0)
, fSize(0)
, fLocalPtr(nullptr)
, fRemoteRegion(nullptr)
{
if (zmq_msg_init(&fMessage) != 0)
{
@@ -47,9 +53,11 @@ FairMQMessageSHM::FairMQMessageSHM(const size_t size)
// , fReceiving(false)
, fQueued(false)
, fMetaCreated(false)
, fRegionId(0)
, fHandle()
, fChunkSize(0)
, fSize(0)
, fLocalPtr(nullptr)
, fRemoteRegion(nullptr)
{
InitializeChunk(size);
}
@@ -60,9 +68,11 @@ FairMQMessageSHM::FairMQMessageSHM(void* data, const size_t size, fairmq_free_fn
// , fReceiving(false)
, fQueued(false)
, fMetaCreated(false)
, fRegionId(0)
, fHandle()
, fChunkSize(0)
, fSize(0)
, fLocalPtr(nullptr)
, fRemoteRegion(nullptr)
{
if (InitializeChunk(size))
{
@@ -78,6 +88,35 @@ FairMQMessageSHM::FairMQMessageSHM(void* data, const size_t size, fairmq_free_fn
}
}
FairMQMessageSHM::FairMQMessageSHM(FairMQRegionPtr& region, void* data, const size_t size)
: fMessage()
// , fOwner(nullptr)
// , fReceiving(false)
, fQueued(false)
, fMetaCreated(false)
, fRegionId(static_cast<FairMQRegionSHM*>(region.get())->fRegionId)
, fHandle()
, fSize(size)
, fLocalPtr(data)
, fRemoteRegion(nullptr)
{
fHandle = (bipc::managed_shared_memory::handle_t)(reinterpret_cast<const char*>(data) - reinterpret_cast<const char*>(region->GetData()));
if (zmq_msg_init_size(&fMessage, sizeof(MetaHeader)) != 0)
{
LOG(ERROR) << "failed initializing meta message, reason: " << zmq_strerror(errno);
}
else
{
MetaHeader* metaPtr = new(zmq_msg_data(&fMessage)) MetaHeader();
metaPtr->fSize = size;
metaPtr->fHandle = fHandle;
metaPtr->fRegionId = fRegionId;
fMetaCreated = true;
}
}
bool FairMQMessageSHM::InitializeChunk(const size_t size)
{
// string chunkID = fDeviceID + "c" + to_string(fMessageID);
@@ -109,7 +148,7 @@ bool FairMQMessageSHM::InitializeChunk(const size_t size)
fHandle = Manager::Instance().Segment()->get_handle_from_address(fLocalPtr);
}
fChunkSize = size;
fSize = size;
if (zmq_msg_init_size(&fMessage, sizeof(MetaHeader)) != 0)
{
@@ -119,6 +158,7 @@ bool FairMQMessageSHM::InitializeChunk(const size_t size)
MetaHeader* metaPtr = new(zmq_msg_data(&fMessage)) MetaHeader();
metaPtr->fSize = size;
metaPtr->fHandle = fHandle;
metaPtr->fRegionId = fRegionId;
// if (zmq_msg_init_data(&fMessage, const_cast<char*>(ownerID->c_str()), ownerID->length(), StringDeleter, ownerID) != 0)
// {
@@ -187,14 +227,21 @@ void* FairMQMessageSHM::GetData()
{
return fLocalPtr;
}
else if (fHandle)
{
return Manager::Instance().Segment()->get_address_from_handle(fHandle);
}
else
{
// LOG(ERROR) << "Trying to get data of an empty shared memory message";
return nullptr;
if (fRegionId == 0)
{
return Manager::Instance().Segment()->get_address_from_handle(fHandle);
}
else
{
if (!fRemoteRegion)
{
fRemoteRegion = FairMQRegionPtr(new FairMQRegionSHM(fRegionId, true));
}
fLocalPtr = reinterpret_cast<char*>(fRemoteRegion->GetData()) + fHandle;
return fLocalPtr;
}
}
// if (fOwner)
@@ -210,7 +257,7 @@ void* FairMQMessageSHM::GetData()
size_t FairMQMessageSHM::GetSize()
{
return fChunkSize;
return fSize;
// if (fOwner)
// {
// return fOwner->fPtr->GetSize();
@@ -324,7 +371,7 @@ void FairMQMessageSHM::CloseMessage()
// }
// else
// {
if (fHandle && !fQueued)
if (fHandle && !fQueued && fRegionId == 0)
{
// LOG(WARN) << "Destroying unsent message";
// Manager::Instance().Segment()->destroy_ptr(fHandle);

View File

@@ -8,13 +8,17 @@
#ifndef FAIRMQMESSAGESHM_H_
#define FAIRMQMESSAGESHM_H_
#include <cstddef>
#include <cstddef> // size_t
#include <string>
#include <atomic>
#include <zmq.h>
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/mapped_region.hpp>
#include "FairMQMessage.h"
#include "FairMQRegion.h"
#include "FairMQShmManager.h"
class FairMQMessageSHM : public FairMQMessage
@@ -25,6 +29,8 @@ class FairMQMessageSHM : public FairMQMessage
FairMQMessageSHM();
FairMQMessageSHM(const size_t size);
FairMQMessageSHM(void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr);
FairMQMessageSHM(FairMQRegionPtr& region, void* data, const size_t size);
FairMQMessageSHM(const FairMQMessageSHM&) = delete;
FairMQMessageSHM operator=(const FairMQMessageSHM&) = delete;
@@ -62,9 +68,11 @@ class FairMQMessageSHM : public FairMQMessage
bool fMetaCreated;
static std::atomic<bool> fInterrupted;
static FairMQ::Transport fTransportType;
uint64_t fRegionId;
bipc::managed_shared_memory::handle_t fHandle;
size_t fChunkSize;
size_t fSize;
void* fLocalPtr;
FairMQRegionPtr fRemoteRegion;
};
#endif /* FAIRMQMESSAGESHM_H_ */

View File

@@ -0,0 +1,97 @@
/********************************************************************************
* Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence version 3 (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#include "FairMQRegionSHM.h"
#include "FairMQShmManager.h"
#include "FairMQShmCommon.h"
using namespace std;
using namespace fair::mq::shmem;
namespace bipc = boost::interprocess;
atomic<bool> FairMQRegionSHM::fInterrupted(false);
FairMQRegionSHM::FairMQRegionSHM(const size_t size)
: fShmemObject()
, fRegion()
, fRegionId(0)
, fRegionIdStr()
, fRemote(false)
{
try
{
RegionCounter* rc = Manager::Instance().ManagementSegment().find<RegionCounter>(bipc::unique_instance).first;
if (rc)
{
LOG(DEBUG) << "shmem: region counter found, with value of " << rc->fCount << ". incrementing.";
(rc->fCount)++;
LOG(DEBUG) << "shmem: incremented region counter, now: " << rc->fCount;
}
else
{
LOG(DEBUG) << "shmem: no region counter found, creating one and initializing with 1";
rc = Manager::Instance().ManagementSegment().construct<RegionCounter>(bipc::unique_instance)(1);
LOG(DEBUG) << "shmem: initialized region counter with: " << rc->fCount;
}
fRegionId = rc->fCount;
fRegionIdStr = "fairmq_shmem_region_" + std::to_string(fRegionId);
fShmemObject = unique_ptr<bipc::shared_memory_object>(new bipc::shared_memory_object(bipc::create_only, fRegionIdStr.c_str(), bipc::read_write));
fShmemObject->truncate(size);
fRegion = unique_ptr<bipc::mapped_region>(new bipc::mapped_region(*fShmemObject, bipc::read_write)); // TODO: add HUGEPAGES flag here
}
catch (bipc::interprocess_exception& e)
{
LOG(ERROR) << "shmem: cannot create region. Already created/not cleaned up?";
LOG(ERROR) << e.what();
exit(EXIT_FAILURE);
}
}
FairMQRegionSHM::FairMQRegionSHM(const uint64_t id, bool remote)
: fShmemObject()
, fRegion()
, fRegionId(id)
, fRegionIdStr()
, fRemote(remote)
{
try
{
fRegionIdStr = "fairmq_shmem_region_" + std::to_string(fRegionId);
fShmemObject = unique_ptr<bipc::shared_memory_object>(new bipc::shared_memory_object(bipc::open_only, fRegionIdStr.c_str(), bipc::read_write));
fRegion = unique_ptr<bipc::mapped_region>(new bipc::mapped_region(*fShmemObject, bipc::read_write)); // TODO: add HUGEPAGES flag here
}
catch (bipc::interprocess_exception& e)
{
LOG(ERROR) << "shmem: cannot open region. Already closed?";
LOG(ERROR) << e.what();
exit(EXIT_FAILURE);
}
}
void* FairMQRegionSHM::GetData() const
{
return fRegion->get_address();
}
size_t FairMQRegionSHM::GetSize() const
{
return fRegion->get_size();
}
FairMQRegionSHM::~FairMQRegionSHM()
{
if (!fRemote)
{
LOG(DEBUG) << "destroying region";
bipc::shared_memory_object::remove(fRegionIdStr.c_str());
}
}

View File

@@ -0,0 +1,46 @@
/********************************************************************************
* Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH *
* *
* This software is distributed under the terms of the *
* GNU Lesser General Public Licence version 3 (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIRMQREGIONSHM_H_
#define FAIRMQREGIONSHM_H_
#include "FairMQRegion.h"
#include <boost/interprocess/shared_memory_object.hpp>
#include <boost/interprocess/mapped_region.hpp>
#include <cstddef> // size_t
#include <atomic>
#include <memory>
#include <string>
class FairMQRegionSHM : public FairMQRegion
{
friend class FairMQSocketSHM;
friend class FairMQMessageSHM;
public:
FairMQRegionSHM(const size_t size);
virtual void* GetData() const override;
virtual size_t GetSize() const override;
virtual ~FairMQRegionSHM();
private:
FairMQRegionSHM(const uint64_t id, bool remote);
static std::atomic<bool> fInterrupted;
std::unique_ptr<boost::interprocess::shared_memory_object> fShmemObject;
std::unique_ptr<boost::interprocess::mapped_region> fRegion;
uint64_t fRegionId;
std::string fRegionIdStr;
bool fRemote;
};
#endif /* FAIRMQREGIONSHM_H_ */

View File

@@ -5,11 +5,13 @@
* GNU Lesser General Public Licence version 3 (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIRMQSHMDEVICECOUNTER_H_
#define FAIRMQSHMDEVICECOUNTER_H_
#ifndef FAIR_MQ_SHMEM_COMMON_H_
#define FAIR_MQ_SHMEM_COMMON_H_
#include <atomic>
#include <boost/interprocess/managed_shared_memory.hpp>
namespace fair
{
namespace mq
@@ -20,14 +22,39 @@ namespace shmem
struct DeviceCounter
{
DeviceCounter(unsigned int c)
: count(c)
: fCount(c)
{}
std::atomic<unsigned int> count;
std::atomic<unsigned int> fCount;
};
struct RegionCounter
{
RegionCounter(unsigned int c)
: fCount(c)
{}
std::atomic<unsigned int> fCount;
};
struct MonitorStatus
{
MonitorStatus()
: fActive(true)
{}
bool fActive;
};
struct alignas(32) MetaHeader
{
uint64_t fSize;
uint64_t fRegionId;
boost::interprocess::managed_shared_memory::handle_t fHandle;
};
} // namespace shmem
} // namespace mq
} // namespace fair
#endif /* FAIRMQSHMDEVICECOUNTER_H_ */
#endif /* FAIR_MQ_SHMEM_COMMON_H_ */

View File

@@ -115,18 +115,42 @@ class Manager
}
}
void Remove()
{
if (bipc::shared_memory_object::remove("fairmq_shmem_main"))
{
LOG(DEBUG) << "shmem: successfully removed \"fairmq_shmem_main\" segment after the device has stopped.";
}
else
{
LOG(DEBUG) << "shmem: did not remove \"fairmq_shmem_main\" segment after the device stopped. Already removed?";
}
if (bipc::shared_memory_object::remove("fairmq_shmem_management"))
{
LOG(DEBUG) << "shmem: successfully removed \"fairmq_shmem_management\" segment after the device has stopped.";
}
else
{
LOG(DEBUG) << "shmem: did not remove \"fairmq_shmem_management\" segment after the device stopped. Already removed?";
}
}
bipc::managed_shared_memory& ManagementSegment()
{
return fManagementSegment;
}
private:
Manager()
: fSegment(nullptr)
, fManagementSegment(bipc::open_or_create, "fairmq_shmem_management", 65536)
{}
Manager(const Manager&) = delete;
Manager operator=(const Manager&) = delete;
bipc::managed_shared_memory* fSegment;
};
struct alignas(16) MetaHeader
{
uint64_t fSize;
bipc::managed_shared_memory::handle_t fHandle;
bipc::managed_shared_memory fManagementSegment;
};
// class Chunk

View File

@@ -7,16 +7,18 @@
********************************************************************************/
#include "FairMQShmMonitor.h"
#include "FairMQShmDeviceCounter.h"
#include "FairMQShmCommon.h"
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/sync/named_mutex.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <csignal>
#include <iostream>
#include <iomanip>
@@ -32,6 +34,11 @@ using String = bipc::basic_string<char, std::char_traits<char>, CharAll
using StringAllocator = bipc::allocator<String, bipc::managed_shared_memory::segment_manager>;
using StringVector = bipc::vector<String, StringAllocator>;
namespace
{
volatile std::sig_atomic_t gSignalStatus;
}
namespace fair
{
namespace mq
@@ -39,6 +46,11 @@ namespace mq
namespace shmem
{
void signalHandler(int signal)
{
gSignalStatus = signal;
}
Monitor::Monitor(const string& segmentName, bool selfDestruct, bool interactive, unsigned int timeoutInMS)
: fSelfDestruct(selfDestruct)
, fInteractive(interactive)
@@ -48,15 +60,43 @@ Monitor::Monitor(const string& segmentName, bool selfDestruct, bool interactive,
, fTerminating(false)
, fHeartbeatTriggered(false)
, fLastHeartbeat()
, fHeartbeatThread()
, fSignalThread()
, fManagementSegment(bipc::open_or_create, "fairmq_shmem_management", 65536)
{
if (bipc::message_queue::remove("fairmq_shmem_control_queue"))
MonitorStatus* monitorStatus = fManagementSegment.find<MonitorStatus>(bipc::unique_instance).first;
if (monitorStatus != nullptr)
{
// cout << "successfully removed control queue" << endl;
cout << "shmmonitor already started or not properly exited. Try `shmmonitor --cleanup`" << endl;
exit(EXIT_FAILURE);
}
else
fManagementSegment.construct<MonitorStatus>(bipc::unique_instance)();
CleanupControlQueues();
}
void Monitor::CatchSignals()
{
signal(SIGINT, signalHandler);
signal(SIGTERM, signalHandler);
fSignalThread = thread(&Monitor::SignalMonitor, this);
}
void Monitor::SignalMonitor()
{
while (true)
{
// cout << "could not remove control queue" << endl;
if (gSignalStatus != 0)
{
fTerminating = true;
cout << "signal: " << gSignalStatus << endl;
break;
}
else if (fTerminating)
{
break;
}
this_thread::sleep_for(chrono::milliseconds(100));
}
}
@@ -109,22 +149,15 @@ void Monitor::MonitorHeartbeats()
cout << ie.what() << endl;
}
if (bipc::message_queue::remove("fairmq_shmem_control_queue"))
{
// cout << "successfully removed control queue" << endl;
}
else
{
cout << "could not remove control queue" << endl;
}
CleanupControlQueues();
}
void Monitor::Interactive()
{
char input;
pollfd inputFd[1];
inputFd[0].fd = fileno(stdin);
inputFd[0].events = POLLIN;
char c;
pollfd cinfd[1];
cinfd[0].fd = fileno(stdin);
cinfd[0].events = POLLIN;
struct termios t;
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
@@ -138,11 +171,16 @@ void Monitor::Interactive()
while (!fTerminating)
{
if (poll(inputFd, 1, 100))
if (poll(cinfd, 1, 100))
{
input = getchar();
if (fTerminating || gSignalStatus != 0)
{
break;
}
switch (input)
c = getchar();
switch (c)
{
case 'q':
cout << "[q] --> quitting." << endl;
@@ -165,7 +203,7 @@ void Monitor::Interactive()
cout << "[\\n] --> invalid input." << endl;
break;
default:
cout << "[" << input << "] --> invalid input." << endl;
cout << "[" << c << "] --> invalid input." << endl;
break;
}
@@ -177,6 +215,11 @@ void Monitor::Interactive()
PrintHeader();
}
if (fTerminating)
{
break;
}
CheckSegment();
if (!fTerminating)
@@ -228,10 +271,10 @@ void Monitor::CheckSegment()
unsigned int numDevices = 0;
pair<fair::mq::shmem::DeviceCounter*, size_t> result = segment.find<fair::mq::shmem::DeviceCounter>(bipc::unique_instance);
if (result.first != nullptr)
fair::mq::shmem::DeviceCounter* dc = segment.find<fair::mq::shmem::DeviceCounter>(bipc::unique_instance).first;
if (dc)
{
numDevices = result.first->count;
numDevices = dc->fCount;
}
auto now = chrono::high_resolution_clock::now();
@@ -294,13 +337,57 @@ void Monitor::CheckSegment()
void Monitor::Cleanup(const string& segmentName)
{
if (bipc::shared_memory_object::remove(segmentName.c_str()))
try
{
cout << "Successfully removed shared memory \"" << segmentName.c_str() << "\"." << endl;
bipc::managed_shared_memory managementSegment(bipc::open_only, "fairmq_shmem_management");
RegionCounter* rc = managementSegment.find<RegionCounter>(bipc::unique_instance).first;
if (rc)
{
cout << "Region counter found: " << rc->fCount << endl;
unsigned int regionCount = rc->fCount;
for (int i = 1; i <= regionCount; ++i)
{
RemoveObject("fairmq_shmem_region_" + to_string(regionCount));
}
}
else
{
cout << "shmem: no region counter found. no regions to cleanup." << endl;
}
RemoveObject("fairmq_shmem_management");
}
catch (bipc::interprocess_exception& ie)
{
cout << "Did not find \"fairmq_shmem_management\" shared memory segment. No regions to cleanup." << endl;
}
RemoveObject(segmentName);
boost::interprocess::named_mutex::remove("fairmq_shmem_mutex");
}
void Monitor::RemoveObject(const std::string& name)
{
if (bipc::shared_memory_object::remove(name.c_str()))
{
cout << "Successfully removed \"" << name << "\" shared memory segment." << endl;
}
else
{
cout << "Did not remove shared memory. Already removed?" << endl;
cout << "Did not remove \"" << name << "\" shared memory segment. Already removed?" << endl;
}
}
void Monitor::CleanupControlQueues()
{
if (bipc::message_queue::remove("fairmq_shmem_control_queue"))
{
// cout << "successfully removed control queue" << endl;
}
else
{
// cout << "could not remove control queue" << endl;
}
}
@@ -311,19 +398,19 @@ void Monitor::PrintQueues()
try
{
bipc::managed_shared_memory segment(bipc::open_only, fSegmentName.c_str());
pair<StringVector*, size_t> queues = segment.find<StringVector>("fairmq_shmem_queues");
if (queues.first != nullptr)
StringVector* queues = segment.find<StringVector>("fairmq_shmem_queues").first;
if (queues)
{
cout << "found " << queues.first->size() << " queue(s):" << endl;
cout << "found " << queues->size() << " queue(s):" << endl;
for (int i = 0; i < queues.first->size(); ++i)
for (int i = 0; i < queues->size(); ++i)
{
string name(queues.first->at(i).c_str());
string name(queues->at(i).c_str());
cout << '\t' << name << " : ";
pair<atomic<int>*, size_t> queueSize = segment.find<atomic<int>>(name.c_str());
if (queueSize.first != nullptr)
atomic<int>* queueSize = segment.find<atomic<int>>(name.c_str()).first;
if (queueSize)
{
cout << *(queueSize.first) << " messages" << endl;
cout << *queueSize << " messages" << endl;
}
else
{
@@ -368,6 +455,15 @@ void Monitor::PrintHelp()
cout << "controls: [x] close memory, [p] print queues, [h] help, [q] quit." << endl;
}
Monitor::~Monitor()
{
fManagementSegment.destroy<MonitorStatus>(bipc::unique_instance);
if (fSignalThread.joinable())
{
fSignalThread.join();
}
}
} // namespace shmem
} // namespace mq
} // namespace fair

View File

@@ -5,8 +5,10 @@
* GNU Lesser General Public Licence version 3 (LGPL) version 3, *
* copied verbatim in the file "LICENSE" *
********************************************************************************/
#ifndef FAIRMQSHMMONITOR_H_
#define FAIRMQSHMMONITOR_H_
#ifndef FAIR_MQ_SHMEM_MONITOR_H_
#define FAIR_MQ_SHMEM_MONITOR_H_
#include <boost/interprocess/managed_shared_memory.hpp>
#include <thread>
#include <chrono>
@@ -28,11 +30,13 @@ class Monitor
Monitor(const Monitor&) = delete;
Monitor operator=(const Monitor&) = delete;
void CatchSignals();
void Run();
virtual ~Monitor() {}
virtual ~Monitor();
static void Cleanup(const std::string& segmentName);
static void CleanupControlQueues();
private:
void PrintHeader();
@@ -41,6 +45,8 @@ class Monitor
void MonitorHeartbeats();
void CheckSegment();
void Interactive();
void SignalMonitor();
static void RemoveObject(const std::string&);
bool fSelfDestruct; // will self-destruct after the memory has been closed
bool fInteractive; // running in interactive mode
@@ -50,11 +56,12 @@ class Monitor
std::atomic<bool> fTerminating;
std::atomic<bool> fHeartbeatTriggered;
std::chrono::high_resolution_clock::time_point fLastHeartbeat;
std::thread fHeartbeatThread;
std::thread fSignalThread;
boost::interprocess::managed_shared_memory fManagementSegment;
};
} // namespace shmem
} // namespace mq
} // namespace fair
#endif /* FAIRMQSHMMONITOR_H_ */
#endif /* FAIR_MQ_SHMEM_MONITOR_H_ */

View File

@@ -11,7 +11,9 @@
#include "FairMQSocketSHM.h"
#include "FairMQMessageSHM.h"
#include "FairMQRegionSHM.h"
#include "FairMQLogger.h"
#include "FairMQShmCommon.h"
using namespace std;
using namespace fair::mq::shmem;
@@ -173,24 +175,22 @@ int FairMQSocketSHM::Receive(FairMQMessagePtr& msg, const int flags)
// ShPtrOwner* owner = Manager::Instance().Segment()->find<ShPtrOwner>(ownerID.c_str()).first;
MetaHeader* hdr = static_cast<MetaHeader*>(zmq_msg_data(msgPtr));
size_t size = 0;
if (hdr->fHandle)
{
static_cast<FairMQMessageSHM*>(msg.get())->fHandle = hdr->fHandle;
static_cast<FairMQMessageSHM*>(msg.get())->fChunkSize = hdr->fSize;
// static_cast<FairMQMessageSHM*>(msg.get())->fOwner = owner;
// static_cast<FairMQMessageSHM*>(msg.get())->fReceiving = true;
size = msg->GetSize();
static_cast<FairMQMessageSHM*>(msg.get())->fHandle = hdr->fHandle;
static_cast<FairMQMessageSHM*>(msg.get())->fSize = hdr->fSize;
static_cast<FairMQMessageSHM*>(msg.get())->fRegionId = hdr->fRegionId;
// static_cast<FairMQMessageSHM*>(msg.get())->fOwner = owner;
// static_cast<FairMQMessageSHM*>(msg.get())->fReceiving = true;
size = msg->GetSize();
fBytesRx += size;
++fMessagesRx;
fBytesRx += size;
++fMessagesRx;
return size;
}
else
{
LOG(ERROR) << "Received meta data, but could not find corresponding chunk";
return -1;
}
return size;
// else
// {
// LOG(ERROR) << "Received meta data, but could not find corresponding chunk";
// return -1;
// }
}
else if (zmq_errno() == EAGAIN)
{
@@ -238,7 +238,6 @@ int64_t FairMQSocketSHM::Send(vector<FairMQMessagePtr>& msgVec, const int flags)
{
static_cast<FairMQMessageSHM*>(msgVec[i].get())->fQueued = true;
// static_cast<FairMQMessageSHM*>(msgVec[i].get())->fReceiving = false;
// static_cast<FairMQMessageSHM*>(msgVec[i].get())->fQueued = true;
size_t size = msgVec[i]->GetSize();
totalSize += size;
@@ -327,23 +326,21 @@ int64_t FairMQSocketSHM::Receive(vector<FairMQMessagePtr>& msgVec, const int fla
// ShPtrOwner* owner = Manager::Instance().Segment()->find<ShPtrOwner>(ownerID.c_str()).first;
MetaHeader* hdr = static_cast<MetaHeader*>(zmq_msg_data(msgPtr));
size_t size = 0;
if (hdr->fHandle)
{
static_cast<FairMQMessageSHM*>(part.get())->fHandle = hdr->fHandle;
static_cast<FairMQMessageSHM*>(part.get())->fChunkSize = hdr->fSize;
// static_cast<FairMQMessageSHM*>(msg.get())->fOwner = owner;
// static_cast<FairMQMessageSHM*>(msg.get())->fReceiving = true;
size = part->GetSize();
static_cast<FairMQMessageSHM*>(part.get())->fHandle = hdr->fHandle;
static_cast<FairMQMessageSHM*>(part.get())->fSize = hdr->fSize;
static_cast<FairMQMessageSHM*>(part.get())->fRegionId = hdr->fRegionId;
// static_cast<FairMQMessageSHM*>(part.get())->fOwner = owner;
// static_cast<FairMQMessageSHM*>(part.get())->fReceiving = true;
size = part->GetSize();
msgVec.push_back(move(part));
msgVec.push_back(move(part));
totalSize += size;
}
else
{
LOG(ERROR) << "Received meta data, but could not find corresponding chunk";
return -1;
}
totalSize += size;
// else
// {
// LOG(ERROR) << "Received meta data, but could not find corresponding chunk";
// return -1;
// }
}
else if (zmq_errno() == EAGAIN)
{
@@ -399,12 +396,14 @@ void FairMQSocketSHM::Close()
void FairMQSocketSHM::Interrupt()
{
FairMQMessageSHM::fInterrupted = true;
FairMQRegionSHM::fInterrupted = true;
fInterrupted = true;
}
void FairMQSocketSHM::Resume()
{
FairMQMessageSHM::fInterrupted = false;
FairMQRegionSHM::fInterrupted = true;
fInterrupted = false;
}

View File

@@ -13,6 +13,7 @@
#include <zmq.h>
#include <boost/version.hpp>
#include <boost/filesystem.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/ipc/message_queue.hpp>
@@ -21,10 +22,13 @@
#include <boost/interprocess/sync/scoped_lock.hpp>
#include <chrono>
#include <thread>
#include <cstdlib> // std::system
using namespace std;
using namespace fair::mq::shmem;
namespace bipc = boost::interprocess;
namespace bfs = boost::filesystem;
namespace bpt = boost::posix_time;
FairMQ::Transport FairMQTransportFactorySHM::fTransportType = FairMQ::Transport::SHM;
@@ -75,30 +79,84 @@ FairMQTransportFactorySHM::FairMQTransportFactorySHM(const string& id, const Fai
LOG(ERROR) << "shmem: failed configuring context, reason: " << zmq_strerror(errno);
}
fSendHeartbeats = true;
fHeartbeatThread = thread(&FairMQTransportFactorySHM::SendHeartbeats, this);
Manager::Instance().InitializeSegment("open_or_create", segmentName, segmentSize);
LOG(DEBUG) << "shmem: created/opened shared memory segment of " << segmentSize << " bytes. Available are " << Manager::Instance().Segment()->get_free_memory() << " bytes.";
{ // mutex scope
{
bipc::scoped_lock<bipc::named_mutex> lock(fShMutex);
pair<DeviceCounter*, size_t> result = Manager::Instance().Segment()->find<DeviceCounter>(bipc::unique_instance);
if (result.first != nullptr)
fDeviceCounter = Manager::Instance().Segment()->find<DeviceCounter>(bipc::unique_instance).first;
if (fDeviceCounter)
{
fDeviceCounter = result.first;
LOG(DEBUG) << "shmem: device counter found, with value of " << fDeviceCounter->count << ". incrementing.";
(fDeviceCounter->count)++;
LOG(DEBUG) << "shmem: incremented device counter, now: " << fDeviceCounter->count;
LOG(DEBUG) << "shmem: device counter found, with value of " << fDeviceCounter->fCount << ". incrementing.";
(fDeviceCounter->fCount)++;
LOG(DEBUG) << "shmem: incremented device counter, now: " << fDeviceCounter->fCount;
}
else
{
LOG(DEBUG) << "shmem: no device counter found, creating one and initializing with 1";
fDeviceCounter = Manager::Instance().Segment()->construct<DeviceCounter>(bipc::unique_instance)(1);
LOG(DEBUG) << "shmem: initialized device counter with: " << fDeviceCounter->count;
LOG(DEBUG) << "shmem: initialized device counter with: " << fDeviceCounter->fCount;
}
// start shm monitor
// try
// {
// MonitorStatus* monitorStatus = fManagementSegment.find<MonitorStatus>(bipc::unique_instance).first;
// if (monitorStatus == nullptr)
// {
// LOG(DEBUG) << "shmem: no shmmonitor found, starting...";
// StartMonitor();
// }
// else
// {
// LOG(DEBUG) << "shmem: found shmmonitor in fairmq_shmem_management.";
// }
// }
// catch (std::exception& e)
// {
// LOG(ERROR) << "shmem: Exception during shmmonitor initialization: " << e.what() << ", application will now exit";
// exit(EXIT_FAILURE);
// }
}
fSendHeartbeats = true;
fHeartbeatThread = thread(&FairMQTransportFactorySHM::SendHeartbeats, this);
}
void FairMQTransportFactorySHM::StartMonitor()
{
int numTries = 0;
if (!bfs::exists(bfs::path("shmmonitor")))
{
LOG(ERROR) << "Could not find shmmonitor. Is it in the PATH? Monitor not started";
return;
}
// TODO: replace with Boost.Process once boost 1.64 is available
int r = system("shmmonitor --self-destruct &");
LOG(DEBUG) << r;
do
{
MonitorStatus* monitorStatus = Manager::Instance().ManagementSegment().find<MonitorStatus>(bipc::unique_instance).first;
if (monitorStatus)
{
LOG(DEBUG) << "shmem: shmmonitor started";
break;
}
else
{
this_thread::sleep_for(std::chrono::milliseconds(10));
if (++numTries > 100)
{
LOG(ERROR) << "Did not get response from shmmonitor after " << 10 * 100 << " milliseconds. Exiting.";
exit(EXIT_FAILURE);
}
}
}
while (true);
}
void FairMQTransportFactorySHM::SendHeartbeats()
@@ -142,6 +200,11 @@ FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage(void* data, const size
return unique_ptr<FairMQMessage>(new FairMQMessageSHM(data, size, ffn, hint));
}
FairMQMessagePtr FairMQTransportFactorySHM::CreateMessage(FairMQRegionPtr& region, void* data, const size_t size) const
{
return unique_ptr<FairMQMessage>(new FairMQMessageSHM(region, data, size));
}
FairMQSocketPtr FairMQTransportFactorySHM::CreateSocket(const string& type, const string& name) const
{
assert(fContext);
@@ -168,6 +231,11 @@ FairMQPollerPtr FairMQTransportFactorySHM::CreatePoller(const FairMQSocket& cmdS
return unique_ptr<FairMQPoller>(new FairMQPollerSHM(cmdSocket, dataSocket));
}
FairMQRegionPtr FairMQTransportFactorySHM::CreateRegion(const size_t size) const
{
return unique_ptr<FairMQRegion>(new FairMQRegionSHM(size));
}
FairMQTransportFactorySHM::~FairMQTransportFactorySHM()
{
fSendHeartbeats = false;
@@ -196,24 +264,17 @@ FairMQTransportFactorySHM::~FairMQTransportFactorySHM()
{ // mutex scope
bipc::scoped_lock<bipc::named_mutex> lock(fShMutex);
(fDeviceCounter->count)--;
(fDeviceCounter->fCount)--;
if (fDeviceCounter->count == 0)
if (fDeviceCounter->fCount == 0)
{
LOG(DEBUG) << "shmem: last 'fairmq_shmem_main' user, removing segment.";
if (bipc::shared_memory_object::remove("fairmq_shmem_main"))
{
LOG(DEBUG) << "shmem: successfully removed shared memory segment after the device has stopped.";
}
else
{
LOG(DEBUG) << "shmem: did not remove shared memory segment after the device stopped. Already removed?";
}
Manager::Instance().Remove();
}
else
{
LOG(DEBUG) << "shmem: other 'fairmq_shmem_main' users present (" << fDeviceCounter->count << "), not removing it.";
LOG(DEBUG) << "shmem: other 'fairmq_shmem_main' users present (" << fDeviceCounter->fCount << "), not removing it.";
}
}
}

View File

@@ -13,8 +13,9 @@
#include "FairMQMessageSHM.h"
#include "FairMQSocketSHM.h"
#include "FairMQPollerSHM.h"
#include "FairMQShmDeviceCounter.h"
#include "FairMQShmCommon.h"
#include <options/FairMQProgOptions.h>
#include "FairMQRegionSHM.h"
#include <vector>
#include <string>
@@ -23,15 +24,17 @@
#include <boost/interprocess/sync/named_mutex.hpp>
class FairMQTransportFactorySHM : public FairMQTransportFactory
{
public:
FairMQTransportFactorySHM(const std::string& id = "", const FairMQProgOptions* config = nullptr);
FairMQTransportFactorySHM(const FairMQTransportFactorySHM&) = delete;
FairMQTransportFactorySHM operator=(const FairMQTransportFactorySHM&) = delete;
FairMQMessagePtr CreateMessage() const override;
FairMQMessagePtr CreateMessage(const size_t size) const override;
FairMQMessagePtr CreateMessage(void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr) const override;
FairMQMessagePtr CreateMessage(FairMQRegionPtr& region, void* data, const size_t size) const override;
FairMQSocketPtr CreateSocket(const std::string& type, const std::string& name) const override;
@@ -40,13 +43,16 @@ class FairMQTransportFactorySHM : public FairMQTransportFactory
FairMQPollerPtr CreatePoller(const std::unordered_map<std::string, std::vector<FairMQChannel>>& channelsMap, const std::vector<std::string>& channelList) const override;
FairMQPollerPtr CreatePoller(const FairMQSocket& cmdSocket, const FairMQSocket& dataSocket) const override;
FairMQ::Transport GetType() const override;
FairMQRegionPtr CreateRegion(const size_t size) const override;
void SendHeartbeats();
FairMQ::Transport GetType() const override;
~FairMQTransportFactorySHM() override;
private:
void SendHeartbeats();
void StartMonitor();
static FairMQ::Transport fTransportType;
void* fContext;
void* fHeartbeatSocket;

View File

@@ -6,5 +6,3 @@ The transport manages shared memory via boost::interprocess library. The transfe
Under development:
- Cleanup of the shared memory segment in case all devices crash. Currently at least one device has to stop properly for a cleanup.
- Implement more than one transport per device.
- Configuration of the shared memory size (currently hard-coded).

View File

@@ -49,6 +49,7 @@ int main(int argc, char** argv)
{
cout << "Cleaning up \"" << segmentName << "\"..." << endl;
fair::mq::shmem::Monitor::Cleanup(segmentName);
fair::mq::shmem::Monitor::CleanupControlQueues();
return 0;
}
@@ -56,6 +57,7 @@ int main(int argc, char** argv)
fair::mq::shmem::Monitor monitor{segmentName, selfDestruct, interactive, timeoutInMS};
monitor.CatchSignals();
monitor.Run();
}
catch (exception& e)