Shm: Provide segment/msg debug infos

This commit is contained in:
Alexey Rybalchenko 2020-08-07 16:29:53 +02:00
parent fe9b87e4e2
commit 70a583d08d
6 changed files with 129 additions and 4 deletions

View File

@ -12,13 +12,15 @@
#include <atomic> #include <atomic>
#include <string> #include <string>
#include <functional> // std::equal_to
#include <boost/interprocess/managed_shared_memory.hpp> #include <boost/functional/hash.hpp>
#include <boost/interprocess/allocators/allocator.hpp> #include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/map.hpp> #include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp> #include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp> #include <boost/interprocess/containers/vector.hpp>
#include <boost/functional/hash.hpp> #include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/unordered_map.hpp>
#include <unistd.h> #include <unistd.h>
#include <sys/types.h> #include <sys/types.h>
@ -71,6 +73,15 @@ struct DeviceCounter
std::atomic<unsigned int> fCount; std::atomic<unsigned int> fCount;
}; };
struct MsgCounter
{
MsgCounter(unsigned int c)
: fCount(c)
{}
std::atomic<unsigned int> fCount;
};
struct RegionCounter struct RegionCounter
{ {
RegionCounter(uint64_t c) RegionCounter(uint64_t c)
@ -88,6 +99,23 @@ struct MetaHeader
boost::interprocess::managed_shared_memory::handle_t fHandle; boost::interprocess::managed_shared_memory::handle_t fHandle;
}; };
struct MsgDebug
{
MsgDebug(pid_t pid, size_t size, const uint64_t creationTime)
: fPid(pid)
, fSize(size)
, fCreationTime(creationTime)
{}
pid_t fPid;
size_t fSize;
uint64_t fCreationTime;
};
using Uint64MsgDebugPairAlloc = boost::interprocess::allocator<std::pair<const size_t, MsgDebug>, SegmentManager>;
using Uint64MsgDebugHashMap = boost::unordered_map<size_t, MsgDebug, boost::hash<size_t>, std::equal_to<size_t>, Uint64MsgDebugPairAlloc>;
using Uint64MsgDebugMap = boost::interprocess::map<size_t, MsgDebug, std::less<size_t>, Uint64MsgDebugPairAlloc>;
struct RegionBlock struct RegionBlock
{ {
RegionBlock() RegionBlock()

View File

@ -71,7 +71,7 @@ class Manager
: fShmId(std::move(shmId)) : fShmId(std::move(shmId))
, fDeviceId(std::move(deviceId)) , fDeviceId(std::move(deviceId))
, fSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_main").c_str(), size) , fSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_main").c_str(), size)
, fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 655360) , fManagementSegment(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mng").c_str(), 6553600)
, fShmVoidAlloc(fManagementSegment.get_segment_manager()) , fShmVoidAlloc(fManagementSegment.get_segment_manager())
, fShmMtx(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mtx").c_str()) , fShmMtx(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_mtx").c_str())
, fRegionEventsCV(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_cv").c_str()) , fRegionEventsCV(boost::interprocess::open_or_create, std::string("fmq_" + fShmId + "_cv").c_str())
@ -80,6 +80,8 @@ class Manager
, fRegionInfos(nullptr) , fRegionInfos(nullptr)
, fInterrupted(false) , fInterrupted(false)
, fMsgCounter(0) , fMsgCounter(0)
, fMsgDebug(nullptr)
, fShmMsgCounter(nullptr)
, fHeartbeatThread() , fHeartbeatThread()
, fSendHeartbeats(true) , fSendHeartbeats(true)
, fThrowOnBadAlloc(true) , fThrowOnBadAlloc(true)
@ -117,6 +119,7 @@ class Manager
} }
fRegionInfos = fManagementSegment.find_or_construct<Uint64RegionInfoMap>(unique_instance)(fShmVoidAlloc); fRegionInfos = fManagementSegment.find_or_construct<Uint64RegionInfoMap>(unique_instance)(fShmVoidAlloc);
fMsgDebug = fManagementSegment.find_or_construct<Uint64MsgDebugMap>(unique_instance)(fShmVoidAlloc);
// store info about the managed segment as region with id 0 // store info about the managed segment as region with id 0
fRegionInfos->emplace(0, RegionInfo("", 0, 0, fShmVoidAlloc)); fRegionInfos->emplace(0, RegionInfo("", 0, 0, fShmVoidAlloc));
@ -134,6 +137,16 @@ class Manager
LOG(debug) << "initialized device counter with: " << fDeviceCounter->fCount; LOG(debug) << "initialized device counter with: " << fDeviceCounter->fCount;
} }
fShmMsgCounter = fManagementSegment.find<MsgCounter>(unique_instance).first;
if (fShmMsgCounter) {
LOG(debug) << "message counter found, with value of " << fShmMsgCounter->fCount << ".";
} else {
LOG(debug) << "no message counter found, creating one and initializing with 0";
fShmMsgCounter = fManagementSegment.construct<MsgCounter>(unique_instance)(0);
LOG(debug) << "initialized message counter with: " << fShmMsgCounter->fCount;
}
fHeartbeatThread = std::thread(&Manager::SendHeartbeats, this); fHeartbeatThread = std::thread(&Manager::SendHeartbeats, this);
} }
@ -394,6 +407,21 @@ class Manager
void IncrementMsgCounter() { fMsgCounter.fetch_add(1, std::memory_order_relaxed); } void IncrementMsgCounter() { fMsgCounter.fetch_add(1, std::memory_order_relaxed); }
void DecrementMsgCounter() { fMsgCounter.fetch_sub(1, std::memory_order_relaxed); } void DecrementMsgCounter() { fMsgCounter.fetch_sub(1, std::memory_order_relaxed); }
void IncrementShmMsgCounter() { ++(fShmMsgCounter->fCount); }
void DecrementShmMsgCounter() { --(fShmMsgCounter->fCount); }
void AddMsgDebug(pid_t pid, size_t size, size_t handle, uint64_t time)
{
fMsgDebug->emplace(handle, MsgDebug(pid, size, time));
}
void RemoveMsgDebug(size_t handle)
{
fMsgDebug->erase(handle);
}
boost::interprocess::named_mutex& GetMtx() { return fShmMtx; }
void SendHeartbeats() void SendHeartbeats()
{ {
std::string controlQueueName("fmq_" + fShmId + "_cq"); std::string controlQueueName("fmq_" + fShmId + "_cq");
@ -473,6 +501,8 @@ class Manager
std::atomic<bool> fInterrupted; std::atomic<bool> fInterrupted;
std::atomic<int32_t> fMsgCounter; // TODO: find a better lifetime solution instead of the counter std::atomic<int32_t> fMsgCounter; // TODO: find a better lifetime solution instead of the counter
Uint64MsgDebugMap* fMsgDebug;
MsgCounter* fShmMsgCounter;
std::thread fHeartbeatThread; std::thread fHeartbeatThread;
bool fSendHeartbeats; bool fSendHeartbeats;

View File

@ -22,6 +22,9 @@
#include <cstddef> // size_t #include <cstddef> // size_t
#include <atomic> #include <atomic>
#include <sys/types.h> // getpid
#include <unistd.h> // pid_t
namespace fair namespace fair
{ {
namespace mq namespace mq
@ -274,6 +277,9 @@ class Message final : public fair::mq::Message
} }
} }
fMeta.fHandle = fManager.Segment().get_handle_from_address(fLocalPtr); fMeta.fHandle = fManager.Segment().get_handle_from_address(fLocalPtr);
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fManager.GetMtx());
fManager.IncrementShmMsgCounter();
fManager.AddMsgDebug(getpid(), size, static_cast<size_t>(fMeta.fHandle), std::chrono::system_clock::now().time_since_epoch().count());
} }
fMeta.fSize = size; fMeta.fSize = size;
@ -285,6 +291,9 @@ class Message final : public fair::mq::Message
if (fMeta.fHandle >= 0 && !fQueued) { if (fMeta.fHandle >= 0 && !fQueued) {
if (fMeta.fRegionId == 0) { if (fMeta.fRegionId == 0) {
fManager.Segment().deallocate(fManager.Segment().get_address_from_handle(fMeta.fHandle)); fManager.Segment().deallocate(fManager.Segment().get_address_from_handle(fMeta.fHandle));
boost::interprocess::scoped_lock<boost::interprocess::named_mutex> lock(fManager.GetMtx());
fManager.DecrementShmMsgCounter();
fManager.RemoveMsgDebug(fMeta.fHandle);
fMeta.fHandle = -1; fMeta.fHandle = -1;
} else { } else {
if (!fRegionPtr) { if (!fRegionPtr) {

View File

@ -225,6 +225,9 @@ void Monitor::Interactive()
case '\n': case '\n':
cout << "\n[\\n] --> invalid input." << endl; cout << "\n[\\n] --> invalid input." << endl;
break; break;
case 'b':
PrintDebug(ShmId{fShmId});
break;
default: default:
cout << "\n[" << c << "] --> invalid input." << endl; cout << "\n[" << c << "] --> invalid input." << endl;
break; break;
@ -284,12 +287,17 @@ void Monitor::CheckSegment()
fSeenOnce = true; fSeenOnce = true;
unsigned int numDevices = 0; unsigned int numDevices = 0;
unsigned int numMessages = 0;
if (fInteractive || fViewOnly) { if (fInteractive || fViewOnly) {
DeviceCounter* dc = managementSegment.find<DeviceCounter>(bipc::unique_instance).first; DeviceCounter* dc = managementSegment.find<DeviceCounter>(bipc::unique_instance).first;
if (dc) { if (dc) {
numDevices = dc->fCount; numDevices = dc->fCount;
} }
MsgCounter* mc = managementSegment.find<MsgCounter>(bipc::unique_instance).first;
if (mc) {
numMessages = mc->fCount;
}
} }
auto now = chrono::high_resolution_clock::now(); auto now = chrono::high_resolution_clock::now();
@ -311,17 +319,27 @@ void Monitor::CheckSegment()
<< setw(10) << segment.get_size() << " | " << setw(10) << segment.get_size() << " | "
<< setw(10) << segment.get_free_memory() << " | " << setw(10) << segment.get_free_memory() << " | "
<< setw(8) << numDevices << " | " << setw(8) << numDevices << " | "
<< setw(8) << numMessages << " | "
<< setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |" << setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |"
<< c << flush; << c << flush;
} else if (fViewOnly) { } else if (fViewOnly) {
size_t free = segment.get_free_memory(); size_t free = segment.get_free_memory();
size_t total = segment.get_size(); size_t total = segment.get_size();
size_t used = total - free; size_t used = total - free;
// size_t mfree = managementSegment.get_free_memory();
// size_t mtotal = managementSegment.get_size();
// size_t mused = mtotal - mfree;
LOGV(info, user1) << "[" << fSegmentName LOGV(info, user1) << "[" << fSegmentName
<< "] devices: " << numDevices << "] devices: " << numDevices
<< ", total: " << total << ", total: " << total
<< ", msgs: " << numMessages
<< ", free: " << free << ", free: " << free
<< ", used: " << used; << ", used: " << used;
// << "\n "
// << "[" << fManagementSegmentName
// << "] total: " << mtotal
// << ", free: " << mfree
// << ", used: " << mused;
} }
} catch (bie&) { } catch (bie&) {
fHeartbeatTriggered = false; fHeartbeatTriggered = false;
@ -331,6 +349,7 @@ void Monitor::CheckSegment()
<< setw(10) << "-" << " | " << setw(10) << "-" << " | "
<< setw(10) << "-" << " | " << setw(10) << "-" << " | "
<< setw(8) << "-" << " | " << setw(8) << "-" << " | "
<< setw(8) << "-" << " | "
<< setw(10) << "-" << " |" << setw(10) << "-" << " |"
<< c << flush; << c << flush;
} }
@ -356,6 +375,35 @@ void Monitor::CheckSegment()
} }
} }
void Monitor::PrintDebug(const ShmId& shmId)
{
string managementSegmentName("fmq_" + shmId.shmId + "_mng");
try {
bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str());
boost::interprocess::named_mutex mtx(boost::interprocess::open_only, std::string("fmq_" + shmId.shmId + "_mtx").c_str());
boost::interprocess::scoped_lock<bipc::named_mutex> lock(mtx);
Uint64MsgDebugMap* debug = managementSegment.find<Uint64MsgDebugMap>(bipc::unique_instance).first;
cout << endl << "found " << debug->size() << " message(s):" << endl;
for (const auto& e : *debug) {
using time_point = std::chrono::system_clock::time_point;
time_point tmpt{std::chrono::duration_cast<time_point::duration>(std::chrono::nanoseconds(e.second.fCreationTime))};
std::time_t t = std::chrono::system_clock::to_time_t(tmpt);
uint64_t ms = e.second.fCreationTime % 1000000;
auto tm = localtime(&t);
cout << "offset: " << setw(12) << setfill(' ') << e.first
<< ", size: " << setw(10) << setfill(' ') << e.second.fSize
<< ", creator PID: " << e.second.fPid << setfill('0')
<< ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms << endl;
}
cout << setfill(' ');
} catch (bie&) {
cout << "no segment found" << endl;
}
}
void Monitor::PrintQueues() void Monitor::PrintQueues()
{ {
cout << '\n'; cout << '\n';
@ -401,13 +449,14 @@ void Monitor::PrintHeader()
<< setw(10) << "size" << " | " << setw(10) << "size" << " | "
<< setw(10) << "free" << " | " << setw(10) << "free" << " | "
<< setw(8) << "devices" << " | " << setw(8) << "devices" << " | "
<< setw(8) << "msgs" << " | "
<< setw(10) << "last hb" << " |" << setw(10) << "last hb" << " |"
<< endl; << endl;
} }
void Monitor::PrintHelp() void Monitor::PrintHelp()
{ {
cout << "controls: [x] close memory, [p] print queues, [h] help, [q] quit." << endl; cout << "controls: [x] close memory, [p] print queues, [] print a list of allocated messages, [h] help, [q] quit." << endl;
} }
void Monitor::RemoveObject(const string& name) void Monitor::RemoveObject(const string& name)

View File

@ -60,6 +60,8 @@ class Monitor
/// @param sessionId session id /// @param sessionId session id
static void CleanupFull(const SessionId& sessionId); static void CleanupFull(const SessionId& sessionId);
static void PrintDebug(const ShmId& shmId);
static void RemoveObject(const std::string&); static void RemoveObject(const std::string&);
static void RemoveFileMapping(const std::string&); static void RemoveFileMapping(const std::string&);
static void RemoveQueue(const std::string&); static void RemoveQueue(const std::string&);

View File

@ -78,6 +78,7 @@ int main(int argc, char** argv)
unsigned int timeoutInMS = 5000; unsigned int timeoutInMS = 5000;
unsigned int intervalInMS = 100; unsigned int intervalInMS = 100;
bool runAsDaemon = false; bool runAsDaemon = false;
bool debug = false;
bool cleanOnExit = false; bool cleanOnExit = false;
options_description desc("Options"); options_description desc("Options");
@ -90,6 +91,7 @@ int main(int argc, char** argv)
("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode") ("view,v" , value<bool>(&viewOnly)->implicit_value(true), "Run in view only mode")
("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds") ("timeout,t" , value<unsigned int>(&timeoutInMS)->default_value(5000), "Heartbeat timeout in milliseconds")
("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor") ("daemonize,d" , value<bool>(&runAsDaemon)->implicit_value(true), "Daemonize the monitor")
("debug,b" , value<bool>(&debug)->implicit_value(true), "Debug - Print a list of messages)")
("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit") ("clean-on-exit,e", value<bool>(&cleanOnExit)->implicit_value(true), "Perform cleanup on exit")
("interval" , value<unsigned int>(&intervalInMS)->default_value(100), "Output interval for interactive/view-only mode") ("interval" , value<unsigned int>(&intervalInMS)->default_value(100), "Output interval for interactive/view-only mode")
("help,h", "Print help"); ("help,h", "Print help");
@ -117,6 +119,11 @@ int main(int argc, char** argv)
return 0; return 0;
} }
if (debug) {
Monitor::PrintDebug(ShmId{shmId});
return 0;
}
cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl; cout << "Starting shared memory monitor for session: \"" << sessionName << "\" (shmId: " << shmId << ")..." << endl;
if (viewOnly && !interactive) { if (viewOnly && !interactive) {
cout << "running in non-interactive view-only mode, outputting with interval of " << intervalInMS << "ms. (change with --interval), press ctrl+C to exit." << endl; cout << "running in non-interactive view-only mode, outputting with interval of " << intervalInMS << "ms. (change with --interval), press ctrl+C to exit." << endl;