mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 08:41:16 +00:00
shm: allow monitor::ResetContent to cleanup after a crash
This commit is contained in:
parent
2f82eb4f09
commit
1a75141fc4
|
@ -133,6 +133,7 @@ struct RegionConfig
|
||||||
bool removeOnDestruction = true; /// remove the region on object destruction
|
bool removeOnDestruction = true; /// remove the region on object destruction
|
||||||
int creationFlags = 0; /// flags passed to the underlying transport on region creation
|
int creationFlags = 0; /// flags passed to the underlying transport on region creation
|
||||||
int64_t userFlags = 0; /// custom flags that have no effect on the transport, but can be retrieved from the region by the user
|
int64_t userFlags = 0; /// custom flags that have no effect on the transport, but can be retrieved from the region by the user
|
||||||
|
uint64_t size = 0; /// region size
|
||||||
std::string path = ""; /// file path, if the region is backed by a file
|
std::string path = ""; /// file path, if the region is backed by a file
|
||||||
std::optional<uint16_t> id = std::nullopt; /// region id
|
std::optional<uint16_t> id = std::nullopt; /// region id
|
||||||
uint32_t linger = 100; /// delay in ms before region destruction to collect outstanding events
|
uint32_t linger = 100; /// delay in ms before region destruction to collect outstanding events
|
||||||
|
|
|
@ -28,6 +28,8 @@
|
||||||
namespace fair::mq::shmem
|
namespace fair::mq::shmem
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static constexpr uint64_t kManagementSegmentSize = 6553600;
|
||||||
|
|
||||||
struct SharedMemoryError : std::runtime_error { using std::runtime_error::runtime_error; };
|
struct SharedMemoryError : std::runtime_error { using std::runtime_error::runtime_error; };
|
||||||
|
|
||||||
using SimpleSeqFitSegment = boost::interprocess::basic_managed_shared_memory<char,
|
using SimpleSeqFitSegment = boost::interprocess::basic_managed_shared_memory<char,
|
||||||
|
|
|
@ -132,7 +132,7 @@ class Manager
|
||||||
: fShmId64(config ? config->GetProperty<uint64_t>("shmid", makeShmIdUint64(sessionName)) : makeShmIdUint64(sessionName))
|
: fShmId64(config ? config->GetProperty<uint64_t>("shmid", makeShmIdUint64(sessionName)) : makeShmIdUint64(sessionName))
|
||||||
, fShmId(makeShmIdStr(fShmId64))
|
, fShmId(makeShmIdStr(fShmId64))
|
||||||
, fSegmentId(config ? config->GetProperty<uint16_t>("shm-segment-id", 0) : 0)
|
, fSegmentId(config ? config->GetProperty<uint16_t>("shm-segment-id", 0) : 0)
|
||||||
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 6553600)
|
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), kManagementSegmentSize)
|
||||||
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
|
, fShmVoidAlloc(fManagementSegment.get_segment_manager())
|
||||||
, fShmMtx(fManagementSegment.find_or_construct<boost::interprocess::interprocess_mutex>(boost::interprocess::unique_instance)())
|
, fShmMtx(fManagementSegment.find_or_construct<boost::interprocess::interprocess_mutex>(boost::interprocess::unique_instance)())
|
||||||
, fNumObservedEvents(0)
|
, fNumObservedEvents(0)
|
||||||
|
|
|
@ -6,9 +6,10 @@
|
||||||
* copied verbatim in the file "LICENSE" *
|
* copied verbatim in the file "LICENSE" *
|
||||||
********************************************************************************/
|
********************************************************************************/
|
||||||
|
|
||||||
#include "Monitor.h"
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "UnmanagedRegion.h"
|
#include "Monitor.h"
|
||||||
|
#include "Segment.h"
|
||||||
|
#include <fairmq/shmem/UnmanagedRegion.h>
|
||||||
|
|
||||||
#include <fairmq/tools/IO.h>
|
#include <fairmq/tools/IO.h>
|
||||||
#include <fairmq/tools/Strings.h>
|
#include <fairmq/tools/Strings.h>
|
||||||
|
@ -415,24 +416,28 @@ void Monitor::PrintDebugInfo(const ShmId& shmId __attribute__((unused)))
|
||||||
|
|
||||||
size_t numMessages = 0;
|
size_t numMessages = 0;
|
||||||
|
|
||||||
for (const auto& e : *debug) {
|
if (debug) {
|
||||||
numMessages += e.second.size();
|
for (const auto& e : *debug) {
|
||||||
}
|
numMessages += e.second.size();
|
||||||
LOG(info) << endl << "found " << numMessages << " messages.";
|
|
||||||
|
|
||||||
for (const auto& s : *debug) {
|
|
||||||
for (const auto& e : s.second) {
|
|
||||||
using time_point = chrono::system_clock::time_point;
|
|
||||||
time_point tmpt{chrono::duration_cast<time_point::duration>(chrono::nanoseconds(e.second.fCreationTime))};
|
|
||||||
time_t t = chrono::system_clock::to_time_t(tmpt);
|
|
||||||
uint64_t ms = e.second.fCreationTime % 1000000;
|
|
||||||
auto tm = localtime(&t);
|
|
||||||
LOG(info) << "segment: " << setw(3) << setfill(' ') << s.first
|
|
||||||
<< ", offset: " << setw(12) << setfill(' ') << e.first
|
|
||||||
<< ", size: " << setw(10) << setfill(' ') << e.second.fSize
|
|
||||||
<< ", creator PID: " << e.second.fPid << setfill('0')
|
|
||||||
<< ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms;
|
|
||||||
}
|
}
|
||||||
|
LOG(info) << endl << "found " << numMessages << " messages.";
|
||||||
|
|
||||||
|
for (const auto& s : *debug) {
|
||||||
|
for (const auto& e : s.second) {
|
||||||
|
using time_point = chrono::system_clock::time_point;
|
||||||
|
time_point tmpt{chrono::duration_cast<time_point::duration>(chrono::nanoseconds(e.second.fCreationTime))};
|
||||||
|
time_t t = chrono::system_clock::to_time_t(tmpt);
|
||||||
|
uint64_t ms = e.second.fCreationTime % 1000000;
|
||||||
|
auto tm = localtime(&t);
|
||||||
|
LOG(info) << "segment: " << setw(3) << setfill(' ') << s.first
|
||||||
|
<< ", offset: " << setw(12) << setfill(' ') << e.first
|
||||||
|
<< ", size: " << setw(10) << setfill(' ') << e.second.fSize
|
||||||
|
<< ", creator PID: " << e.second.fPid << setfill('0')
|
||||||
|
<< ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
LOG(info) << "no debug data found";
|
||||||
}
|
}
|
||||||
} catch (bie&) {
|
} catch (bie&) {
|
||||||
LOG(info) << "no segments found";
|
LOG(info) << "no segments found";
|
||||||
|
@ -463,11 +468,16 @@ unordered_map<uint16_t, std::vector<BufferDebugInfo>> Monitor::GetDebugInfo(cons
|
||||||
|
|
||||||
result.reserve(debug->size());
|
result.reserve(debug->size());
|
||||||
|
|
||||||
for (const auto& s : *debug) {
|
|
||||||
result[s.first].reserve(s.second.size());
|
if (debug) {
|
||||||
for (const auto& e : s.second) {
|
for (const auto& s : *debug) {
|
||||||
result[s.first][e.first] = BufferDebugInfo(e.first, e.second.fPid, e.second.fSize, e.second.fCreationTime);
|
result[s.first].reserve(s.second.size());
|
||||||
|
for (const auto& e : s.second) {
|
||||||
|
result[s.first][e.first] = BufferDebugInfo(e.first, e.second.fPid, e.second.fSize, e.second.fCreationTime);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
LOG(info) << "no debug data found";
|
||||||
}
|
}
|
||||||
} catch (bie&) {
|
} catch (bie&) {
|
||||||
LOG(info) << "no segments found";
|
LOG(info) << "no segments found";
|
||||||
|
@ -701,6 +711,43 @@ void Monitor::ResetContent(const SessionId& sessionId, bool verbose /* = true */
|
||||||
ResetContent(shmId, verbose);
|
ResetContent(shmId, verbose);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Monitor::ResetContent(const ShmId& shmIdT, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose /* = true */)
|
||||||
|
{
|
||||||
|
using namespace boost::interprocess;
|
||||||
|
|
||||||
|
std::string shmId = shmIdT.shmId;
|
||||||
|
std::string managementSegmentName("fmq_" + shmId + "_mng");
|
||||||
|
// reset managed segments
|
||||||
|
ResetContent(shmIdT, verbose);
|
||||||
|
// delete management segment
|
||||||
|
Remove<bipc::shared_memory_object>(managementSegmentName, verbose);
|
||||||
|
// recreate management segment
|
||||||
|
managed_shared_memory mngSegment(create_only, managementSegmentName.c_str(), kManagementSegmentSize);
|
||||||
|
// fill management segment with segment & region infos
|
||||||
|
for (const auto& s : segmentCfgs) {
|
||||||
|
if (s.allocationAlgorithm == "rbtree_best_fit") {
|
||||||
|
Segment::Register(shmId, s.id, AllocationAlgorithm::rbtree_best_fit);
|
||||||
|
} else if (s.allocationAlgorithm == "simple_seq_fit") {
|
||||||
|
Segment::Register(shmId, s.id, AllocationAlgorithm::simple_seq_fit);
|
||||||
|
} else {
|
||||||
|
LOG(error) << "Unknown allocation algorithm provided: " << s.allocationAlgorithm;
|
||||||
|
throw MonitorError("Unknown allocation algorithm provided: " + s.allocationAlgorithm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const auto& r : regionCfgs) {
|
||||||
|
fair::mq::shmem::UnmanagedRegion::Register(shmId, r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Monitor::ResetContent(const SessionId& sessionId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose /* = true */)
|
||||||
|
{
|
||||||
|
ShmId shmId{makeShmIdStr(sessionId.sessionId)};
|
||||||
|
if (verbose) {
|
||||||
|
cout << "ResetContent called with session id '" << sessionId.sessionId << "', translating to shared memory id '" << shmId.shmId << "'" << endl;
|
||||||
|
}
|
||||||
|
ResetContent(shmId, segmentCfgs, regionCfgs, verbose);
|
||||||
|
}
|
||||||
|
|
||||||
Monitor::~Monitor()
|
Monitor::~Monitor()
|
||||||
{
|
{
|
||||||
if (fSignalThread.joinable()) {
|
if (fSignalThread.joinable()) {
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
#ifndef FAIR_MQ_SHMEM_MONITOR_H_
|
#ifndef FAIR_MQ_SHMEM_MONITOR_H_
|
||||||
#define FAIR_MQ_SHMEM_MONITOR_H_
|
#define FAIR_MQ_SHMEM_MONITOR_H_
|
||||||
|
|
||||||
|
#include <fairmq/UnmanagedRegion.h>
|
||||||
|
|
||||||
#include <fairlogger/Logger.h>
|
#include <fairlogger/Logger.h>
|
||||||
|
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
@ -49,6 +51,13 @@ struct BufferDebugInfo
|
||||||
uint64_t fCreationTime;
|
uint64_t fCreationTime;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SegmentConfig
|
||||||
|
{
|
||||||
|
uint16_t id;
|
||||||
|
uint64_t size;
|
||||||
|
std::string allocationAlgorithm;
|
||||||
|
};
|
||||||
|
|
||||||
class Monitor
|
class Monitor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -88,6 +97,14 @@ class Monitor
|
||||||
/// @param sessionId session id
|
/// @param sessionId session id
|
||||||
/// Only call this when segment is not in use
|
/// Only call this when segment is not in use
|
||||||
static void ResetContent(const SessionId& sessionId, bool verbose = true);
|
static void ResetContent(const SessionId& sessionId, bool verbose = true);
|
||||||
|
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
|
||||||
|
/// @param shmId shared memory id
|
||||||
|
/// Only call this when segment is not in use
|
||||||
|
static void ResetContent(const ShmId& shmId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose = true);
|
||||||
|
/// @brief [EXPERIMENTAL] cleanup the content of the shem segment, without recreating it
|
||||||
|
/// @param sessionId session id
|
||||||
|
/// Only call this when segment is not in use
|
||||||
|
static void ResetContent(const SessionId& sessionId, const std::vector<SegmentConfig>& segmentCfgs, const std::vector<RegionConfig>& regionCfgs, bool verbose = true);
|
||||||
|
|
||||||
/// @brief Outputs list of messages in shmem (if compiled with FAIRMQ_DEBUG_MODE=ON)
|
/// @brief Outputs list of messages in shmem (if compiled with FAIRMQ_DEBUG_MODE=ON)
|
||||||
/// @param shmId shmem id
|
/// @param shmId shmem id
|
||||||
|
|
|
@ -26,6 +26,8 @@ static const RBTreeBestFit rbTreeBestFit = RBTreeBestFit();
|
||||||
|
|
||||||
struct Segment
|
struct Segment
|
||||||
{
|
{
|
||||||
|
friend class Monitor;
|
||||||
|
|
||||||
Segment(const std::string& shmId, uint16_t id, size_t size, SimpleSeqFit)
|
Segment(const std::string& shmId, uint16_t id, size_t size, SimpleSeqFit)
|
||||||
: fSegment(SimpleSeqFitSegment(boost::interprocess::open_or_create,
|
: fSegment(SimpleSeqFitSegment(boost::interprocess::open_or_create,
|
||||||
std::string("fmq_" + shmId + "_m_" + std::to_string(id)).c_str(),
|
std::string("fmq_" + shmId + "_m_" + std::to_string(id)).c_str(),
|
||||||
|
@ -66,15 +68,12 @@ struct Segment
|
||||||
static void Register(const std::string& shmId, uint16_t id, AllocationAlgorithm allocAlgo)
|
static void Register(const std::string& shmId, uint16_t id, AllocationAlgorithm allocAlgo)
|
||||||
{
|
{
|
||||||
using namespace boost::interprocess;
|
using namespace boost::interprocess;
|
||||||
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
|
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), kManagementSegmentSize);
|
||||||
VoidAlloc alloc(mngSegment.get_segment_manager());
|
VoidAlloc alloc(mngSegment.get_segment_manager());
|
||||||
|
|
||||||
Uint16SegmentInfoHashMap* shmSegments = mngSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(alloc);
|
Uint16SegmentInfoHashMap* shmSegments = mngSegment.find_or_construct<Uint16SegmentInfoHashMap>(unique_instance)(alloc);
|
||||||
|
|
||||||
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
|
EventCounter* eventCounter = mngSegment.find_or_construct<EventCounter>(unique_instance)(0);
|
||||||
if (!eventCounter) {
|
|
||||||
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool newSegmentRegistered = shmSegments->emplace(id, allocAlgo).second;
|
bool newSegmentRegistered = shmSegments->emplace(id, allocAlgo).second;
|
||||||
if (newSegmentRegistered) {
|
if (newSegmentRegistered) {
|
||||||
|
|
|
@ -41,6 +41,7 @@ struct UnmanagedRegion
|
||||||
{
|
{
|
||||||
friend class Message;
|
friend class Message;
|
||||||
friend class Manager;
|
friend class Manager;
|
||||||
|
friend class Monitor;
|
||||||
|
|
||||||
UnmanagedRegion(const std::string& shmId, uint16_t id, uint64_t size)
|
UnmanagedRegion(const std::string& shmId, uint16_t id, uint64_t size)
|
||||||
: UnmanagedRegion(shmId, size, false, makeRegionConfig(id))
|
: UnmanagedRegion(shmId, size, false, makeRegionConfig(id))
|
||||||
|
@ -50,6 +51,10 @@ struct UnmanagedRegion
|
||||||
: UnmanagedRegion(shmId, size, false, std::move(cfg))
|
: UnmanagedRegion(shmId, size, false, std::move(cfg))
|
||||||
{}
|
{}
|
||||||
|
|
||||||
|
UnmanagedRegion(const std::string& shmId, RegionConfig cfg)
|
||||||
|
: UnmanagedRegion(shmId, cfg.size, false, std::move(cfg))
|
||||||
|
{}
|
||||||
|
|
||||||
UnmanagedRegion(const std::string& shmId, uint64_t size, bool remote, RegionConfig cfg)
|
UnmanagedRegion(const std::string& shmId, uint64_t size, bool remote, RegionConfig cfg)
|
||||||
: fRemote(remote)
|
: fRemote(remote)
|
||||||
, fRemoveOnDestruction(cfg.removeOnDestruction)
|
, fRemoveOnDestruction(cfg.removeOnDestruction)
|
||||||
|
@ -66,6 +71,9 @@ struct UnmanagedRegion
|
||||||
{
|
{
|
||||||
using namespace boost::interprocess;
|
using namespace boost::interprocess;
|
||||||
|
|
||||||
|
// TODO: refactor this
|
||||||
|
cfg.size = size;
|
||||||
|
|
||||||
if (!cfg.path.empty()) {
|
if (!cfg.path.empty()) {
|
||||||
fName = std::string(cfg.path + fName);
|
fName = std::string(cfg.path + fName);
|
||||||
|
|
||||||
|
@ -119,7 +127,7 @@ struct UnmanagedRegion
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!remote) {
|
if (!remote) {
|
||||||
Register(shmId, cfg, size);
|
Register(shmId, cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG(trace) << "shmem: initialized region: " << fName << " (" << (remote ? "remote" : "local") << ")";
|
LOG(trace) << "shmem: initialized region: " << fName << " (" << (remote ? "remote" : "local") << ")";
|
||||||
|
@ -223,20 +231,17 @@ struct UnmanagedRegion
|
||||||
return regionCfg;
|
return regionCfg;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void Register(const std::string& shmId, RegionConfig& cfg, uint64_t size)
|
static void Register(const std::string& shmId, const RegionConfig& cfg)
|
||||||
{
|
{
|
||||||
using namespace boost::interprocess;
|
using namespace boost::interprocess;
|
||||||
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), 6553600);
|
managed_shared_memory mngSegment(open_or_create, std::string("fmq_" + shmId + "_mng").c_str(), kManagementSegmentSize);
|
||||||
VoidAlloc alloc(mngSegment.get_segment_manager());
|
VoidAlloc alloc(mngSegment.get_segment_manager());
|
||||||
|
|
||||||
Uint16RegionInfoHashMap* shmRegions = mngSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(alloc);
|
Uint16RegionInfoHashMap* shmRegions = mngSegment.find_or_construct<Uint16RegionInfoHashMap>(unique_instance)(alloc);
|
||||||
|
|
||||||
EventCounter* eventCounter = mngSegment.find<EventCounter>(unique_instance).first;
|
EventCounter* eventCounter = mngSegment.find_or_construct<EventCounter>(unique_instance)(0);
|
||||||
if (!eventCounter) {
|
|
||||||
eventCounter = mngSegment.construct<EventCounter>(unique_instance)(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool newShmRegionCreated = shmRegions->emplace(cfg.id.value(), RegionInfo(cfg.path.c_str(), cfg.creationFlags, cfg.userFlags, size, alloc)).second;
|
bool newShmRegionCreated = shmRegions->emplace(cfg.id.value(), RegionInfo(cfg.path.c_str(), cfg.creationFlags, cfg.userFlags, cfg.size, alloc)).second;
|
||||||
if (newShmRegionCreated) {
|
if (newShmRegionCreated) {
|
||||||
(eventCounter->fCount)++;
|
(eventCounter->fCount)++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user