mirror of
https://github.com/FairRootGroup/FairMQ.git
synced 2025-10-13 16:46:47 +00:00
Shm monitor: fix startup race and enable view-only mode
This commit is contained in:
parent
2c6f436858
commit
2ac8f98178
|
@ -73,15 +73,6 @@ struct RegionCounter
|
||||||
std::atomic<uint64_t> fCount;
|
std::atomic<uint64_t> fCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MonitorStatus
|
|
||||||
{
|
|
||||||
MonitorStatus()
|
|
||||||
: fActive(true)
|
|
||||||
{}
|
|
||||||
|
|
||||||
bool fActive;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct MetaHeader
|
struct MetaHeader
|
||||||
{
|
{
|
||||||
size_t fSize;
|
size_t fSize;
|
||||||
|
|
|
@ -77,11 +77,12 @@ FairMQTransportFactorySHM::FairMQTransportFactorySHM(const string& id, const fai
|
||||||
LOG(error) << "failed configuring context, reason: " << zmq_strerror(errno);
|
LOG(error) << "failed configuring context, reason: " << zmq_strerror(errno);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (autolaunchMonitor) {
|
||||||
|
Manager::StartMonitor(fShmId);
|
||||||
|
}
|
||||||
|
|
||||||
fManager = fair::mq::tools::make_unique<Manager>(fShmId, segmentSize);
|
fManager = fair::mq::tools::make_unique<Manager>(fShmId, segmentSize);
|
||||||
|
|
||||||
if (autolaunchMonitor) {
|
|
||||||
fManager->StartMonitor();
|
|
||||||
}
|
|
||||||
} catch (bipc::interprocess_exception& e) {
|
} catch (bipc::interprocess_exception& e) {
|
||||||
LOG(error) << "Could not initialize shared memory transport: " << e.what();
|
LOG(error) << "Could not initialize shared memory transport: " << e.what();
|
||||||
throw runtime_error(fair::mq::tools::ToString("Could not initialize shared memory transport: ", e.what()));
|
throw runtime_error(fair::mq::tools::ToString("Could not initialize shared memory transport: ", e.what()));
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <boost/filesystem.hpp>
|
#include <boost/filesystem.hpp>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using bie = ::boost::interprocess::interprocess_exception;
|
||||||
namespace bipc = ::boost::interprocess;
|
namespace bipc = ::boost::interprocess;
|
||||||
namespace bfs = ::boost::filesystem;
|
namespace bfs = ::boost::filesystem;
|
||||||
|
|
||||||
|
@ -53,57 +54,42 @@ Manager::Manager(const std::string& id, size_t size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bipc::managed_shared_memory& Manager::Segment()
|
void Manager::StartMonitor(const std::string& id)
|
||||||
{
|
|
||||||
return fSegment;
|
|
||||||
}
|
|
||||||
|
|
||||||
bipc::managed_shared_memory& Manager::ManagementSegment()
|
|
||||||
{
|
|
||||||
return fManagementSegment;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Manager::StartMonitor()
|
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
MonitorStatus* monitorStatus = fManagementSegment.find<MonitorStatus>(bipc::unique_instance).first;
|
bipc::named_mutex monitorStatus(bipc::open_only, string("fmq_" + id + "_ms").c_str());
|
||||||
if (monitorStatus == nullptr) {
|
LOG(debug) << "Found fairmq-shmmonitor for shared memory id " << id;
|
||||||
LOG(debug) << "no fairmq-shmmonitor found, starting...";
|
} catch (bie&) {
|
||||||
auto env = boost::this_process::environment();
|
LOG(debug) << "no fairmq-shmmonitor found for shared memory id " << id << ", starting...";
|
||||||
|
auto env = boost::this_process::environment();
|
||||||
|
|
||||||
vector<bfs::path> ownPath = boost::this_process::path();
|
vector<bfs::path> ownPath = boost::this_process::path();
|
||||||
|
|
||||||
if (const char* fmqp = getenv("FAIRMQ_PATH")) {
|
if (const char* fmqp = getenv("FAIRMQ_PATH")) {
|
||||||
ownPath.insert(ownPath.begin(), bfs::path(fmqp));
|
ownPath.insert(ownPath.begin(), bfs::path(fmqp));
|
||||||
}
|
}
|
||||||
|
|
||||||
bfs::path p = boost::process::search_path("fairmq-shmmonitor", ownPath);
|
bfs::path p = boost::process::search_path("fairmq-shmmonitor", ownPath);
|
||||||
|
|
||||||
if (!p.empty()) {
|
if (!p.empty()) {
|
||||||
boost::process::spawn(p, "-x", "--shmid", fShmId, "-d", "-t", "2000", env);
|
boost::process::spawn(p, "-x", "--shmid", id, "-d", "-t", "2000", env);
|
||||||
int numTries = 0;
|
int numTries = 0;
|
||||||
do {
|
do {
|
||||||
monitorStatus = fManagementSegment.find<MonitorStatus>(bipc::unique_instance).first;
|
try {
|
||||||
if (monitorStatus) {
|
bipc::named_mutex monitorStatus(bipc::open_only, string("fmq_" + id + "_ms").c_str());
|
||||||
LOG(debug) << "fairmq-shmmonitor started";
|
LOG(debug) << "Started fairmq-shmmonitor for shared memory id " << id;
|
||||||
break;
|
break;
|
||||||
} else {
|
} catch (bie&) {
|
||||||
this_thread::sleep_for(chrono::milliseconds(10));
|
this_thread::sleep_for(chrono::milliseconds(10));
|
||||||
if (++numTries > 1000) {
|
if (++numTries > 1000) {
|
||||||
LOG(error) << "Did not get response from fairmq-shmmonitor after " << 10 * 1000 << " milliseconds. Exiting.";
|
LOG(error) << "Did not get response from fairmq-shmmonitor after " << 10 * 1000 << " milliseconds. Exiting.";
|
||||||
throw runtime_error(fair::mq::tools::ToString("Did not get response from fairmq-shmmonitor after ", 10 * 1000, " milliseconds. Exiting."));
|
throw runtime_error(fair::mq::tools::ToString("Did not get response from fairmq-shmmonitor after ", 10 * 1000, " milliseconds. Exiting."));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} while (true);
|
} while (true);
|
||||||
} else {
|
} else {
|
||||||
LOG(warn) << "could not find fairmq-shmmonitor in the path";
|
LOG(warn) << "could not find fairmq-shmmonitor in the path";
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG(debug) << "found fairmq-shmmonitor.";
|
|
||||||
}
|
}
|
||||||
} catch (std::exception& e) {
|
|
||||||
LOG(error) << "Exception during fairmq-shmmonitor initialization: " << e.what() << ", application will now exit";
|
|
||||||
exit(EXIT_FAILURE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -174,7 +160,7 @@ Region* Manager::GetRemoteRegion(const uint64_t id)
|
||||||
|
|
||||||
auto r = fRegions.emplace(id, fair::mq::tools::make_unique<Region>(*this, id, 0, true, nullptr, path, flags));
|
auto r = fRegions.emplace(id, fair::mq::tools::make_unique<Region>(*this, id, 0, true, nullptr, path, flags));
|
||||||
return r.first->second.get();
|
return r.first->second.get();
|
||||||
} catch (bipc::interprocess_exception& e) {
|
} catch (bie& e) {
|
||||||
LOG(warn) << "Could not get remote region for id: " << id;
|
LOG(warn) << "Could not get remote region for id: " << id;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,10 +52,10 @@ class Manager
|
||||||
|
|
||||||
~Manager();
|
~Manager();
|
||||||
|
|
||||||
boost::interprocess::managed_shared_memory& Segment();
|
boost::interprocess::managed_shared_memory& Segment() { return fSegment; }
|
||||||
boost::interprocess::managed_shared_memory& ManagementSegment();
|
boost::interprocess::managed_shared_memory& ManagementSegment() { return fManagementSegment; }
|
||||||
|
|
||||||
void StartMonitor();
|
static void StartMonitor(const std::string&);
|
||||||
|
|
||||||
static void Interrupt();
|
static void Interrupt();
|
||||||
static void Resume();
|
static void Resume();
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <poll.h>
|
#include <poll.h>
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
using bie = ::boost::interprocess::interprocess_exception;
|
||||||
namespace bipc = ::boost::interprocess;
|
namespace bipc = ::boost::interprocess;
|
||||||
namespace bpt = ::boost::posix_time;
|
namespace bpt = ::boost::posix_time;
|
||||||
|
|
||||||
|
@ -45,11 +46,12 @@ void signalHandler(int signal)
|
||||||
gSignalStatus = signal;
|
gSignalStatus = signal;
|
||||||
}
|
}
|
||||||
|
|
||||||
Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, unsigned int timeoutInMS, bool runAsDaemon, bool cleanOnExit)
|
Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, bool viewOnly, unsigned int timeoutInMS, bool runAsDaemon, bool cleanOnExit)
|
||||||
: fSelfDestruct(selfDestruct)
|
: fSelfDestruct(selfDestruct)
|
||||||
, fInteractive(interactive)
|
, fInteractive(interactive)
|
||||||
, fSeenOnce(false)
|
, fViewOnly(viewOnly)
|
||||||
, fIsDaemon(runAsDaemon)
|
, fIsDaemon(runAsDaemon)
|
||||||
|
, fSeenOnce(false)
|
||||||
, fCleanOnExit(cleanOnExit)
|
, fCleanOnExit(cleanOnExit)
|
||||||
, fTimeoutInMS(timeoutInMS)
|
, fTimeoutInMS(timeoutInMS)
|
||||||
, fShmId(shmId)
|
, fShmId(shmId)
|
||||||
|
@ -63,14 +65,14 @@ Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, unsig
|
||||||
, fManagementSegment(bipc::open_or_create, fManagementSegmentName.c_str(), 65536)
|
, fManagementSegment(bipc::open_or_create, fManagementSegmentName.c_str(), 65536)
|
||||||
, fDeviceHeartbeats()
|
, fDeviceHeartbeats()
|
||||||
{
|
{
|
||||||
MonitorStatus* monitorStatus = fManagementSegment.find<MonitorStatus>(bipc::unique_instance).first;
|
if (!fViewOnly) {
|
||||||
if (monitorStatus != nullptr) {
|
try {
|
||||||
cout << "fairmq-shmmonitor already started or not properly exited. Try `fairmq-shmmonitor --cleanup`" << endl;
|
bipc::named_mutex monitorStatus(bipc::create_only, string("fmq_" + fShmId + "_ms").c_str());
|
||||||
exit(EXIT_FAILURE);
|
} catch (bie&) {
|
||||||
|
cout << "fairmq-shmmonitor for shared memory id " << fShmId << " already started or not properly exited. Try `fairmq-shmmonitor --cleanup --shmid " << fShmId << "`" << endl;
|
||||||
|
throw DaemonPresent(tools::ToString("fairmq-shmmonitor for shared memory id ", fShmId, " already started or not properly exited."));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fManagementSegment.construct<MonitorStatus>(bipc::unique_instance)();
|
|
||||||
|
|
||||||
RemoveQueue(fControlQueueName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::CatchSignals()
|
void Monitor::CatchSignals()
|
||||||
|
@ -97,7 +99,11 @@ void Monitor::SignalMonitor()
|
||||||
|
|
||||||
void Monitor::Run()
|
void Monitor::Run()
|
||||||
{
|
{
|
||||||
thread heartbeatThread(&Monitor::MonitorHeartbeats, this);
|
thread heartbeatThread;
|
||||||
|
if (!fViewOnly) {
|
||||||
|
RemoveQueue(fControlQueueName);
|
||||||
|
heartbeatThread = thread(&Monitor::MonitorHeartbeats, this);
|
||||||
|
}
|
||||||
|
|
||||||
if (fInteractive) {
|
if (fInteractive) {
|
||||||
Interactive();
|
Interactive();
|
||||||
|
@ -108,7 +114,9 @@ void Monitor::Run()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
heartbeatThread.join();
|
if (!fViewOnly) {
|
||||||
|
heartbeatThread.join();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::MonitorHeartbeats()
|
void Monitor::MonitorHeartbeats()
|
||||||
|
@ -131,13 +139,34 @@ void Monitor::MonitorHeartbeats()
|
||||||
// cout << "control queue timeout" << endl;
|
// cout << "control queue timeout" << endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (bipc::interprocess_exception& ie) {
|
} catch (bie& ie) {
|
||||||
cout << ie.what() << endl;
|
cout << ie.what() << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
RemoveQueue(fControlQueueName);
|
RemoveQueue(fControlQueueName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct TerminalConfig
|
||||||
|
{
|
||||||
|
TerminalConfig()
|
||||||
|
{
|
||||||
|
termios t;
|
||||||
|
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||||
|
t.c_lflag &= ~ICANON; // disable canonical input
|
||||||
|
t.c_lflag &= ~ECHO; // do not echo input chars
|
||||||
|
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||||
|
}
|
||||||
|
|
||||||
|
~TerminalConfig()
|
||||||
|
{
|
||||||
|
termios t;
|
||||||
|
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
||||||
|
t.c_lflag |= ICANON; // re-enable canonical input
|
||||||
|
t.c_lflag |= ECHO; // echo input chars
|
||||||
|
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void Monitor::Interactive()
|
void Monitor::Interactive()
|
||||||
{
|
{
|
||||||
char c;
|
char c;
|
||||||
|
@ -145,11 +174,7 @@ void Monitor::Interactive()
|
||||||
cinfd[0].fd = fileno(stdin);
|
cinfd[0].fd = fileno(stdin);
|
||||||
cinfd[0].events = POLLIN;
|
cinfd[0].events = POLLIN;
|
||||||
|
|
||||||
struct termios t;
|
TerminalConfig tcfg;
|
||||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
|
||||||
t.c_lflag &= ~ICANON; // disable canonical input
|
|
||||||
t.c_lflag &= ~ECHO; // do not echo input chars
|
|
||||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
|
||||||
|
|
||||||
cout << endl;
|
cout << endl;
|
||||||
PrintHelp();
|
PrintHelp();
|
||||||
|
@ -175,7 +200,11 @@ void Monitor::Interactive()
|
||||||
break;
|
break;
|
||||||
case 'x':
|
case 'x':
|
||||||
cout << "\n[x] --> closing shared memory:" << endl;
|
cout << "\n[x] --> closing shared memory:" << endl;
|
||||||
Cleanup(fShmId);
|
if (!fViewOnly) {
|
||||||
|
Cleanup(fShmId);
|
||||||
|
} else {
|
||||||
|
cout << "cannot close because in view only mode" << endl;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 'h':
|
case 'h':
|
||||||
cout << "\n[h] --> help:" << endl << endl;
|
cout << "\n[h] --> help:" << endl << endl;
|
||||||
|
@ -207,11 +236,6 @@ void Monitor::Interactive()
|
||||||
cout << "\r";
|
cout << "\r";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure
|
|
||||||
t.c_lflag |= ICANON; // re-enable canonical input
|
|
||||||
t.c_lflag |= ECHO; // echo input chars
|
|
||||||
tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::CheckSegment()
|
void Monitor::CheckSegment()
|
||||||
|
@ -250,9 +274,11 @@ void Monitor::CheckSegment()
|
||||||
|
|
||||||
unsigned int numDevices = 0;
|
unsigned int numDevices = 0;
|
||||||
|
|
||||||
fair::mq::shmem::DeviceCounter* dc = managementSegment.find<fair::mq::shmem::DeviceCounter>(bipc::unique_instance).first;
|
if (fInteractive) {
|
||||||
if (dc) {
|
fair::mq::shmem::DeviceCounter* dc = managementSegment.find<fair::mq::shmem::DeviceCounter>(bipc::unique_instance).first;
|
||||||
numDevices = dc->fCount;
|
if (dc) {
|
||||||
|
numDevices = dc->fCount;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto now = chrono::high_resolution_clock::now();
|
auto now = chrono::high_resolution_clock::now();
|
||||||
|
@ -270,31 +296,23 @@ void Monitor::CheckSegment()
|
||||||
|
|
||||||
if (fInteractive) {
|
if (fInteractive) {
|
||||||
cout << "| "
|
cout << "| "
|
||||||
<< setw(18) << fSegmentName << " | "
|
<< setw(18) << fSegmentName << " | "
|
||||||
<< setw(10) << segment.get_size() << " | "
|
<< setw(10) << segment.get_size() << " | "
|
||||||
<< setw(10) << segment.get_free_memory() << " | "
|
<< setw(10) << segment.get_free_memory() << " | "
|
||||||
// << setw(15) << segment.all_memory_deallocated() << " | "
|
<< setw(8) << numDevices << " | "
|
||||||
<< setw(2) << segment.check_sanity() << " | "
|
<< setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |"
|
||||||
// << setw(10) << segment.get_num_named_objects() << " | "
|
<< c << flush;
|
||||||
<< setw(10) << numDevices << " | "
|
|
||||||
// << setw(10) << segment.get_num_unique_objects() << " |"
|
|
||||||
<< setw(10) << duration << " |"
|
|
||||||
<< c
|
|
||||||
<< flush;
|
|
||||||
}
|
}
|
||||||
} catch (bipc::interprocess_exception& ie) {
|
} catch (bie&) {
|
||||||
fHeartbeatTriggered = false;
|
fHeartbeatTriggered = false;
|
||||||
if (fInteractive) {
|
if (fInteractive) {
|
||||||
cout << "| "
|
cout << "| "
|
||||||
<< setw(18) << "-" << " | "
|
<< setw(18) << "-" << " | "
|
||||||
<< setw(10) << "-" << " | "
|
<< setw(10) << "-" << " | "
|
||||||
<< setw(10) << "-" << " | "
|
<< setw(10) << "-" << " | "
|
||||||
// << setw(15) << "-" << " | "
|
<< setw(8) << "-" << " | "
|
||||||
<< setw(2) << "-" << " | "
|
<< setw(10) << "-" << " |"
|
||||||
<< setw(10) << "-" << " | "
|
<< c << flush;
|
||||||
<< setw(10) << "-" << " |"
|
|
||||||
<< c
|
|
||||||
<< flush;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto now = chrono::high_resolution_clock::now();
|
auto now = chrono::high_resolution_clock::now();
|
||||||
|
@ -318,50 +336,60 @@ void Monitor::CheckSegment()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::Cleanup(const string& shmId)
|
void Monitor::PrintQueues()
|
||||||
{
|
{
|
||||||
string managementSegmentName("fmq_" + shmId + "_mng");
|
cout << '\n';
|
||||||
|
|
||||||
try {
|
try {
|
||||||
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
|
bipc::managed_shared_memory segment(bipc::open_only, fSegmentName.c_str());
|
||||||
RegionCounter* rc = managementSegment.find<RegionCounter>(bipc::unique_instance).first;
|
StrVector* queues = segment.find<StrVector>(string("fmq_" + fShmId + "_qs").c_str()).first;
|
||||||
if (rc) {
|
if (queues) {
|
||||||
cout << "Region counter found: " << rc->fCount << endl;
|
cout << "found " << queues->size() << " queue(s):" << endl;
|
||||||
uint64_t regionCount = rc->fCount;
|
|
||||||
|
|
||||||
Uint64RegionInfoMap* m = managementSegment.find<Uint64RegionInfoMap>(bipc::unique_instance).first;
|
for (const auto& queue : *queues) {
|
||||||
|
string name(queue.c_str());
|
||||||
for (uint64_t i = 1; i <= regionCount; ++i) {
|
cout << '\t' << name << " : ";
|
||||||
if (m != nullptr) {
|
atomic<int>* queueSize = segment.find<atomic<int>>(name.c_str()).first;
|
||||||
RegionInfo ri = m->at(i);
|
if (queueSize) {
|
||||||
string path = ri.fPath.c_str();
|
cout << *queueSize << " messages" << endl;
|
||||||
int flags = ri.fFlags;
|
|
||||||
cout << "Found RegionInfo with path: '" << path << "', flags: " << flags << "'." << endl;
|
|
||||||
if (path != "") {
|
|
||||||
RemoveFileMapping(tools::ToString(path, "fmq_" + shmId + "_rg_" + to_string(i)));
|
|
||||||
} else {
|
|
||||||
RemoveObject("fmq_" + shmId + "_rg_" + to_string(i));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
RemoveObject("fmq_" + shmId + "_rg_" + to_string(i));
|
cout << "\tqueue does not have a queue size entry." << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
RemoveQueue(string("fmq_" + shmId + "_rgq_" + to_string(i)));
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cout << "No region counter found. no regions to cleanup." << endl;
|
cout << "\tno queues found" << endl;
|
||||||
}
|
}
|
||||||
|
} catch (bie&) {
|
||||||
RemoveObject(managementSegmentName.c_str());
|
cout << "\tno queues found" << endl;
|
||||||
} catch (bipc::interprocess_exception& ie) {
|
} catch (out_of_range&) {
|
||||||
cout << "Did not find '" << managementSegmentName << "' shared memory segment. No regions to cleanup." << endl;
|
cout << "\tno queues found" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
RemoveObject("fmq_" + shmId + "_main");
|
cout << "\n --> last heartbeats: " << endl << endl;
|
||||||
RemoveMutex("fmq_" + shmId + "_mtx");
|
auto now = chrono::high_resolution_clock::now();
|
||||||
|
for (const auto& h : fDeviceHeartbeats) {
|
||||||
|
cout << "\t" << h.first << " : " << chrono::duration<double, milli>(now - h.second).count() << "ms ago." << endl;
|
||||||
|
}
|
||||||
|
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Monitor::PrintHeader()
|
||||||
|
{
|
||||||
|
cout << "| "
|
||||||
|
<< setw(18) << "name" << " | "
|
||||||
|
<< setw(10) << "size" << " | "
|
||||||
|
<< setw(10) << "free" << " | "
|
||||||
|
<< setw(8) << "devices" << " | "
|
||||||
|
<< setw(10) << "last hb" << " |"
|
||||||
|
<< endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Monitor::PrintHelp()
|
||||||
|
{
|
||||||
|
cout << "controls: [x] close memory, [p] print queues, [h] help, [q] quit." << endl;
|
||||||
|
}
|
||||||
|
|
||||||
void Monitor::RemoveObject(const string& name)
|
void Monitor::RemoveObject(const string& name)
|
||||||
{
|
{
|
||||||
if (bipc::shared_memory_object::remove(name.c_str())) {
|
if (bipc::shared_memory_object::remove(name.c_str())) {
|
||||||
|
@ -398,73 +426,61 @@ void Monitor::RemoveMutex(const string& name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::PrintQueues()
|
void Monitor::Cleanup(const string& shmId)
|
||||||
{
|
{
|
||||||
cout << '\n';
|
string managementSegmentName("fmq_" + shmId + "_mng");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
bipc::managed_shared_memory segment(bipc::open_only, fSegmentName.c_str());
|
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
|
||||||
StrVector* queues = segment.find<StrVector>(string("fmq_" + fShmId + "_qs").c_str()).first;
|
RegionCounter* rc = managementSegment.find<RegionCounter>(bipc::unique_instance).first;
|
||||||
if (queues) {
|
if (rc) {
|
||||||
cout << "found " << queues->size() << " queue(s):" << endl;
|
cout << "Region counter found: " << rc->fCount << endl;
|
||||||
|
uint64_t regionCount = rc->fCount;
|
||||||
|
|
||||||
for (const auto& queue : *queues) {
|
Uint64RegionInfoMap* m = managementSegment.find<Uint64RegionInfoMap>(bipc::unique_instance).first;
|
||||||
string name(queue.c_str());
|
|
||||||
cout << '\t' << name << " : ";
|
for (uint64_t i = 1; i <= regionCount; ++i) {
|
||||||
atomic<int>* queueSize = segment.find<atomic<int>>(name.c_str()).first;
|
if (m != nullptr) {
|
||||||
if (queueSize) {
|
RegionInfo ri = m->at(i);
|
||||||
cout << *queueSize << " messages" << endl;
|
string path = ri.fPath.c_str();
|
||||||
|
int flags = ri.fFlags;
|
||||||
|
cout << "Found RegionInfo with path: '" << path << "', flags: " << flags << "'." << endl;
|
||||||
|
if (path != "") {
|
||||||
|
RemoveFileMapping(tools::ToString(path, "fmq_" + shmId + "_rg_" + to_string(i)));
|
||||||
|
} else {
|
||||||
|
RemoveObject("fmq_" + shmId + "_rg_" + to_string(i));
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
cout << "\tqueue does not have a queue size entry." << endl;
|
RemoveObject("fmq_" + shmId + "_rg_" + to_string(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
RemoveQueue(string("fmq_" + shmId + "_rgq_" + to_string(i)));
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
cout << "\tno queues found" << endl;
|
cout << "No region counter found. no regions to cleanup." << endl;
|
||||||
}
|
}
|
||||||
} catch (bipc::interprocess_exception& ie) {
|
|
||||||
cout << "\tno queues found" << endl;
|
RemoveObject(managementSegmentName.c_str());
|
||||||
} catch (out_of_range& ie) {
|
} catch (bie&) {
|
||||||
cout << "\tno queues found" << endl;
|
cout << "Did not find '" << managementSegmentName << "' shared memory segment. No regions to cleanup." << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "\n --> last heartbeats: " << endl << endl;
|
RemoveObject("fmq_" + shmId + "_main");
|
||||||
auto now = chrono::high_resolution_clock::now();
|
RemoveMutex("fmq_" + shmId + "_mtx");
|
||||||
for (const auto& h : fDeviceHeartbeats) {
|
|
||||||
cout << "\t" << h.first << " : " << chrono::duration<double, milli>(now - h.second).count() << "ms ago." << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
cout << endl;
|
cout << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Monitor::PrintHeader()
|
|
||||||
{
|
|
||||||
cout << "| "
|
|
||||||
<< "\033[01;32m" << setw(18) << "name" << "\033[0m" << " | "
|
|
||||||
<< "\033[01;32m" << setw(10) << "size" << "\033[0m" << " | "
|
|
||||||
<< "\033[01;32m" << setw(10) << "free" << "\033[0m" << " | "
|
|
||||||
// << "\033[01;32m" << setw(15) << "all deallocated" << "\033[0m" << " | "
|
|
||||||
<< "\033[01;32m" << setw(2) << "ok" << "\033[0m" << " | "
|
|
||||||
// << "\033[01;32m" << setw(10) << "# named" << "\033[0m" << " | "
|
|
||||||
<< "\033[01;32m" << setw(10) << "# devices" << "\033[0m" << " | "
|
|
||||||
// << "\033[01;32m" << setw(10) << "# unique" << "\033[0m" << " |"
|
|
||||||
<< "\033[01;32m" << setw(10) << "ms since" << "\033[0m" << " |"
|
|
||||||
<< endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Monitor::PrintHelp()
|
|
||||||
{
|
|
||||||
cout << "controls: [x] close memory, [p] print queues, [h] help, [q] quit." << endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
Monitor::~Monitor()
|
Monitor::~Monitor()
|
||||||
{
|
{
|
||||||
fManagementSegment.destroy<MonitorStatus>(bipc::unique_instance);
|
|
||||||
if (fSignalThread.joinable()) {
|
if (fSignalThread.joinable()) {
|
||||||
fSignalThread.join();
|
fSignalThread.join();
|
||||||
}
|
}
|
||||||
if (fCleanOnExit) {
|
if (fCleanOnExit) {
|
||||||
Cleanup(fShmId);
|
Cleanup(fShmId);
|
||||||
}
|
}
|
||||||
|
if (!fViewOnly) {
|
||||||
|
RemoveMutex("fmq_" + fShmId + "_ms");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace shmem
|
} // namespace shmem
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <stdexcept>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
namespace fair
|
namespace fair
|
||||||
|
@ -26,22 +27,24 @@ namespace shmem
|
||||||
class Monitor
|
class Monitor
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
Monitor(const std::string& shmId, bool selfDestruct, bool interactive, unsigned int timeoutInMS, bool runAsDaemon, bool cleanOnExit);
|
Monitor(const std::string& shmId, bool selfDestruct, bool interactive, bool viewOnly, unsigned int timeoutInMS, bool runAsDaemon, bool cleanOnExit);
|
||||||
|
|
||||||
Monitor(const Monitor&) = delete;
|
Monitor(const Monitor&) = delete;
|
||||||
Monitor operator=(const Monitor&) = delete;
|
Monitor operator=(const Monitor&) = delete;
|
||||||
|
|
||||||
|
virtual ~Monitor();
|
||||||
|
|
||||||
void CatchSignals();
|
void CatchSignals();
|
||||||
void Run();
|
void Run();
|
||||||
|
|
||||||
virtual ~Monitor();
|
|
||||||
|
|
||||||
static void Cleanup(const std::string& shmId);
|
static void Cleanup(const std::string& shmId);
|
||||||
static void RemoveObject(const std::string&);
|
static void RemoveObject(const std::string&);
|
||||||
static void RemoveFileMapping(const std::string&);
|
static void RemoveFileMapping(const std::string&);
|
||||||
static void RemoveQueue(const std::string&);
|
static void RemoveQueue(const std::string&);
|
||||||
static void RemoveMutex(const std::string&);
|
static void RemoveMutex(const std::string&);
|
||||||
|
|
||||||
|
struct DaemonPresent : std::runtime_error { using std::runtime_error::runtime_error; };
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void PrintHeader();
|
void PrintHeader();
|
||||||
void PrintHelp();
|
void PrintHelp();
|
||||||
|
@ -53,8 +56,9 @@ class Monitor
|
||||||
|
|
||||||
bool fSelfDestruct; // will self-destruct after the memory has been closed
|
bool fSelfDestruct; // will self-destruct after the memory has been closed
|
||||||
bool fInteractive; // running in interactive mode
|
bool fInteractive; // running in interactive mode
|
||||||
bool fSeenOnce; // true is segment has been opened successfully at least once
|
bool fViewOnly; // view only mode
|
||||||
bool fIsDaemon;
|
bool fIsDaemon;
|
||||||
|
bool fSeenOnce; // true is segment has been opened successfully at least once
|
||||||
bool fCleanOnExit;
|
bool fCleanOnExit;
|
||||||
unsigned int fTimeoutInMS;
|
unsigned int fTimeoutInMS;
|
||||||
std::string fShmId;
|
std::string fShmId;
|
||||||
|
|
|
@ -32,5 +32,6 @@ FairMQ Shared Memory currently uses following names to register shared memory on
|
||||||
`fmq_<shmId>_mng` - management segment name, used for storing management data.
|
`fmq_<shmId>_mng` - management segment name, used for storing management data.
|
||||||
`fmq_<shmId>_cq` - message queue for communicating between shm transport and shm monitor (exists independent of above segments).
|
`fmq_<shmId>_cq` - message queue for communicating between shm transport and shm monitor (exists independent of above segments).
|
||||||
`fmq_<shmId>_mtx` - boost::interprocess::named_mutex for management purposes (exists independent of above segments).
|
`fmq_<shmId>_mtx` - boost::interprocess::named_mutex for management purposes (exists independent of above segments).
|
||||||
|
`fmq_<shmId>_ms` - shmmonitor status used to signal if it is active or not (exists independent of above segments).
|
||||||
`fmq_<shmId>_rg_<index>` - names of unmanaged regions.
|
`fmq_<shmId>_rg_<index>` - names of unmanaged regions.
|
||||||
`fmq_<shmId>_rgq_<index>` - names of queues for the unmanaged regions.
|
`fmq_<shmId>_rgq_<index>` - names of queues for the unmanaged regions.
|
||||||
|
|
|
@ -74,20 +74,22 @@ int main(int argc, char** argv)
|
||||||
bool cleanup = false;
|
bool cleanup = false;
|
||||||
bool selfDestruct = false;
|
bool selfDestruct = false;
|
||||||
bool interactive = false;
|
bool interactive = false;
|
||||||
|
bool viewOnly = false;
|
||||||
unsigned int timeoutInMS = 5000;
|
unsigned int timeoutInMS = 5000;
|
||||||
bool runAsDaemon = false;
|
bool runAsDaemon = false;
|
||||||
bool cleanOnExit = false;
|
bool cleanOnExit = false;
|
||||||
|
|
||||||
options_description desc("Options");
|
options_description desc("Options");
|
||||||
desc.add_options()
|
desc.add_options()
|
||||||
("session,s", value<string>(&sessionName)->default_value("default"), "session id which to monitor")
|
("session,s" , value<string>(&sessionName)->default_value("default"), "Session id")
|
||||||
("shmid", value<string>(&shmId)->default_value(""), "Shmem Id to monitor (if not provided, it is generated out of session id and user id)")
|
("shmid" , value<string>(&shmId)->default_value(""), "Shmem id (if not provided, it is generated out of session id and user id)")
|
||||||
("cleanup,c", value<bool>(&cleanup)->implicit_value(true), "Perform cleanup and quit")
|
("cleanup,c" , value<bool>(&cleanup)->implicit_value(true), "Perform cleanup and quit")
|
||||||
("self-destruct,x", value<bool>(&selfDestruct)->implicit_value(true), "Quit after first closing of the memory")
|
("self-destruct,x", value<bool>(&selfDestruct)->implicit_value(true), "Quit after first closing of the memory")
|
||||||
("interactive,i", value<bool>(&interactive)->implicit_value(true), "Interactive run")
|
("interactive,i" , value<bool>(&interactive)->implicit_value(true), "Interactive run")
|
||||||
("timeout,t", value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
|
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
|
||||||
("daemonize,d", value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
|
("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
|
||||||
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
|
("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
|
||||||
|
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
|
||||||
("help,h", "Print help");
|
("help,h", "Print help");
|
||||||
|
|
||||||
variables_map vm;
|
variables_map vm;
|
||||||
|
@ -117,10 +119,12 @@ int main(int argc, char** argv)
|
||||||
|
|
||||||
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
|
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
|
||||||
|
|
||||||
Monitor monitor{shmId, selfDestruct, interactive, timeoutInMS, runAsDaemon, cleanOnExit};
|
Monitor monitor(shmId, selfDestruct, interactive, viewOnly, timeoutInMS, runAsDaemon, cleanOnExit);
|
||||||
|
|
||||||
monitor.CatchSignals();
|
monitor.CatchSignals();
|
||||||
monitor.Run();
|
monitor.Run();
|
||||||
|
} catch (Monitor::DaemonPresent& dp) {
|
||||||
|
return 0;
|
||||||
} catch (exception& e) {
|
} catch (exception& e) {
|
||||||
cerr << "Unhandled Exception reached the top of main: " << e.what() << ", application will now exit" << endl;
|
cerr << "Unhandled Exception reached the top of main: " << e.what() << ", application will now exit" << endl;
|
||||||
return 2;
|
return 2;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user