/******************************************************************************** * Copyright (C) 2014 GSI Helmholtzzentrum fuer Schwerionenforschung GmbH * * * * This software is distributed under the terms of the * * GNU Lesser General Public Licence (LGPL) version 3, * * copied verbatim in the file "LICENSE" * ********************************************************************************/ #include "Monitor.h" #include "Common.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using bie = ::boost::interprocess::interprocess_exception; namespace bipc = ::boost::interprocess; namespace bpt = ::boost::posix_time; namespace { volatile sig_atomic_t gSignalStatus = 0; } namespace fair { namespace mq { namespace shmem { void signalHandler(int signal) { gSignalStatus = signal; } Monitor::Monitor(const string& shmId, bool selfDestruct, bool interactive, bool viewOnly, unsigned int timeoutInMS, unsigned int intervalInMS, bool runAsDaemon, bool cleanOnExit) : fSelfDestruct(selfDestruct) , fInteractive(interactive) , fViewOnly(viewOnly) , fIsDaemon(runAsDaemon) , fSeenOnce(false) , fCleanOnExit(cleanOnExit) , fTimeoutInMS(timeoutInMS) , fIntervalInMS(intervalInMS) , fShmId(shmId) , fSegmentName("fmq_" + fShmId + "_main") , fManagementSegmentName("fmq_" + fShmId + "_mng") , fControlQueueName("fmq_" + fShmId + "_cq") , fTerminating(false) , fHeartbeatTriggered(false) , fLastHeartbeat(chrono::high_resolution_clock::now()) , fSignalThread() , fDeviceHeartbeats() { if (!fViewOnly) { try { bipc::named_mutex monitorStatus(bipc::create_only, string("fmq_" + fShmId + "_ms").c_str()); } catch (bie&) { cout << "fairmq-shmmonitor for shared memory id " << fShmId << " already started or not properly exited. Try `fairmq-shmmonitor --cleanup --shmid " << fShmId << "`" << endl; throw DaemonPresent(tools::ToString("fairmq-shmmonitor for shared memory id ", fShmId, " already started or not properly exited.")); } } Logger::SetConsoleColor(false); Logger::DefineVerbosity(Verbosity::user1, VerbositySpec::Make(VerbositySpec::Info::timestamp_us)); Logger::SetVerbosity(Verbosity::verylow); } void Monitor::CatchSignals() { signal(SIGINT, signalHandler); signal(SIGTERM, signalHandler); fSignalThread = thread(&Monitor::SignalMonitor, this); } void Monitor::SignalMonitor() { while (true) { if (gSignalStatus != 0) { fTerminating = true; cout << "signal: " << gSignalStatus << endl; break; } else if (fTerminating) { break; } this_thread::sleep_for(chrono::milliseconds(100)); } } void Monitor::Run() { thread heartbeatThread; if (!fViewOnly) { RemoveQueue(fControlQueueName); heartbeatThread = thread(&Monitor::MonitorHeartbeats, this); } if (fInteractive) { Interactive(); } else { while (!fTerminating) { this_thread::sleep_for(chrono::milliseconds(fIntervalInMS)); CheckSegment(); } } if (!fViewOnly) { heartbeatThread.join(); } } void Monitor::MonitorHeartbeats() { try { bipc::message_queue mq(bipc::open_or_create, fControlQueueName.c_str(), 1000, 256); unsigned int priority; bipc::message_queue::size_type recvdSize; char msg[256] = {0}; while (!fTerminating) { bpt::ptime rcvTill = bpt::microsec_clock::universal_time() + bpt::milliseconds(100); if (mq.timed_receive(&msg, sizeof(msg), recvdSize, priority, rcvTill)) { fHeartbeatTriggered = true; fLastHeartbeat = chrono::high_resolution_clock::now(); string deviceId(msg, recvdSize); fDeviceHeartbeats[deviceId] = fLastHeartbeat; } else { // cout << "control queue timeout" << endl; } } } catch (bie& ie) { cout << ie.what() << endl; } RemoveQueue(fControlQueueName); } struct TerminalConfig { TerminalConfig() { termios t; tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure t.c_lflag &= ~ICANON; // disable canonical input t.c_lflag &= ~ECHO; // do not echo input chars tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings } ~TerminalConfig() { termios t; tcgetattr(STDIN_FILENO, &t); // get the current terminal I/O structure t.c_lflag |= ICANON; // re-enable canonical input t.c_lflag |= ECHO; // echo input chars tcsetattr(STDIN_FILENO, TCSANOW, &t); // apply the new settings } }; void Monitor::Interactive() { char c; pollfd cinfd[1]; cinfd[0].fd = fileno(stdin); cinfd[0].events = POLLIN; TerminalConfig tcfg; cout << endl; PrintHelp(); cout << endl; PrintHeader(); while (!fTerminating) { if (poll(cinfd, 1, fIntervalInMS)) { if (fTerminating || gSignalStatus != 0) { break; } c = getchar(); switch (c) { case 'q': cout << "\n[q] --> quitting." << endl; fTerminating = true; break; case 'p': cout << "\n[p] --> active queues:" << endl; PrintQueues(); break; case 'x': cout << "\n[x] --> closing shared memory:" << endl; if (!fViewOnly) { Cleanup(ShmId{fShmId}); } else { cout << "cannot close because in view only mode" << endl; } break; case 'h': cout << "\n[h] --> help:" << endl << endl; PrintHelp(); cout << endl; break; case '\n': cout << "\n[\\n] --> invalid input." << endl; break; case 'b': PrintDebug(ShmId{fShmId}); break; default: cout << "\n[" << c << "] --> invalid input." << endl; break; } if (fTerminating) { break; } PrintHeader(); } if (fTerminating) { break; } CheckSegment(); if (!fTerminating) { cout << "\r"; } } } void Monitor::CheckSegment() { char c = '#'; if (fInteractive) { static uint64_t counter = 0; int mod = counter++ % 5; switch (mod) { case 0: c = '-'; break; case 1: c = '\\'; break; case 2: c = '|'; break; case 3: c = '-'; break; case 4: c = '/'; break; default: break; } } try { bipc::managed_shared_memory segment(bipc::open_only, fSegmentName.c_str()); bipc::managed_shared_memory managementSegment(bipc::open_only, fManagementSegmentName.c_str()); fSeenOnce = true; unsigned int numDevices = 0; #ifdef FAIRMQ_DEBUG_MODE unsigned int numMessages = 0; #endif if (fInteractive || fViewOnly) { DeviceCounter* dc = managementSegment.find(bipc::unique_instance).first; if (dc) { numDevices = dc->fCount; } #ifdef FAIRMQ_DEBUG_MODE MsgCounter* mc = managementSegment.find(bipc::unique_instance).first; if (mc) { numMessages = mc->fCount; } #endif } auto now = chrono::high_resolution_clock::now(); unsigned int duration = chrono::duration_cast(now - fLastHeartbeat).count(); if (fHeartbeatTriggered && duration > fTimeoutInMS) { cout << "no heartbeats since over " << fTimeoutInMS << " milliseconds, cleaning..." << endl; Cleanup(ShmId{fShmId}); fHeartbeatTriggered = false; if (fSelfDestruct) { cout << "\nself destructing" << endl; fTerminating = true; } } if (fInteractive) { cout << "| " << setw(18) << fSegmentName << " | " << setw(10) << segment.get_size() << " | " << setw(10) << segment.get_free_memory() << " | " << setw(8) << numDevices << " | " #ifdef FAIRMQ_DEBUG_MODE << setw(8) << numMessages << " | " #else << setw(8) << "nodebug" << " | " #endif << setw(10) << (fViewOnly ? "view only" : to_string(duration)) << " |" << c << flush; } else if (fViewOnly) { size_t free = segment.get_free_memory(); size_t total = segment.get_size(); size_t used = total - free; // size_t mfree = managementSegment.get_free_memory(); // size_t mtotal = managementSegment.get_size(); // size_t mused = mtotal - mfree; LOGV(info, user1) << "[" << fSegmentName << "] devices: " << numDevices << ", total: " << total #ifdef FAIRMQ_DEBUG_MODE << ", msgs: " << numMessages #else << ", msgs: NODEBUG" #endif << ", free: " << free << ", used: " << used; // << "\n " // << "[" << fManagementSegmentName // << "] total: " << mtotal // << ", free: " << mfree // << ", used: " << mused; } } catch (bie&) { fHeartbeatTriggered = false; if (fInteractive) { cout << "| " << setw(18) << "-" << " | " << setw(10) << "-" << " | " << setw(10) << "-" << " | " << setw(8) << "-" << " | " << setw(8) << "-" << " | " << setw(10) << "-" << " |" << c << flush; } auto now = chrono::high_resolution_clock::now(); unsigned int duration = chrono::duration_cast(now - fLastHeartbeat).count(); if (fIsDaemon && duration > fTimeoutInMS * 2) { Cleanup(ShmId{fShmId}); fHeartbeatTriggered = false; if (fSelfDestruct) { cout << "\nself destructing" << endl; fTerminating = true; } } if (fSelfDestruct) { if (fSeenOnce) { cout << "self destructing" << endl; fTerminating = true; } } } } void Monitor::PrintDebug(const ShmId& shmId __attribute__((unused))) { #ifdef FAIRMQ_DEBUG_MODE string managementSegmentName("fmq_" + shmId.shmId + "_mng"); try { bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str()); boost::interprocess::named_mutex mtx(boost::interprocess::open_only, std::string("fmq_" + shmId.shmId + "_mtx").c_str()); boost::interprocess::scoped_lock lock(mtx); Uint64MsgDebugMap* debug = managementSegment.find(bipc::unique_instance).first; cout << endl << "found " << debug->size() << " message(s):" << endl; for (const auto& e : *debug) { using time_point = std::chrono::system_clock::time_point; time_point tmpt{std::chrono::duration_cast(std::chrono::nanoseconds(e.second.fCreationTime))}; std::time_t t = std::chrono::system_clock::to_time_t(tmpt); uint64_t ms = e.second.fCreationTime % 1000000; auto tm = localtime(&t); cout << "offset: " << setw(12) << setfill(' ') << e.first << ", size: " << setw(10) << setfill(' ') << e.second.fSize << ", creator PID: " << e.second.fPid << setfill('0') << ", at: " << setw(2) << tm->tm_hour << ":" << setw(2) << tm->tm_min << ":" << setw(2) << tm->tm_sec << "." << setw(6) << ms << endl; } cout << setfill(' '); } catch (bie&) { cout << "no segment found" << endl; } #else cout << "FairMQ was not compiled in debug mode (FAIRMQ_DEBUG_MODE)" << endl; #endif } void Monitor::PrintQueues() { cout << '\n'; try { bipc::managed_shared_memory segment(bipc::open_only, fSegmentName.c_str()); StrVector* queues = segment.find(string("fmq_" + fShmId + "_qs").c_str()).first; if (queues) { cout << "found " << queues->size() << " queue(s):" << endl; for (const auto& queue : *queues) { string name(queue.c_str()); cout << '\t' << name << " : "; atomic* queueSize = segment.find>(name.c_str()).first; if (queueSize) { cout << *queueSize << " messages" << endl; } else { cout << "\tqueue does not have a queue size entry." << endl; } } } else { cout << "\tno queues found" << endl; } } catch (bie&) { cout << "\tno queues found" << endl; } catch (out_of_range&) { cout << "\tno queues found" << endl; } cout << "\n --> last heartbeats: " << endl << endl; auto now = chrono::high_resolution_clock::now(); for (const auto& h : fDeviceHeartbeats) { cout << "\t" << h.first << " : " << chrono::duration(now - h.second).count() << "ms ago." << endl; } cout << endl; } void Monitor::PrintHeader() { cout << "| " << setw(18) << "name" << " | " << setw(10) << "size" << " | " << setw(10) << "free" << " | " << setw(8) << "devices" << " | " << setw(8) << "msgs" << " | " << setw(10) << "last hb" << " |" << endl; } void Monitor::PrintHelp() { cout << "controls: [x] close memory, " << "[p] print queues, " << "[b] print a list of allocated messages (only available when compiled with FAIMQ_DEBUG_MODE=ON), " << "[h] help, " << "[q] quit." << endl; } void Monitor::RemoveObject(const string& name) { if (bipc::shared_memory_object::remove(name.c_str())) { cout << "Successfully removed '" << name << "'." << endl; } else { cout << "Did not remove '" << name << "'. Already removed?" << endl; } } void Monitor::RemoveFileMapping(const string& name) { if (bipc::file_mapping::remove(name.c_str())) { cout << "Successfully removed '" << name << "'." << endl; } else { cout << "Did not remove '" << name << "'. Already removed?" << endl; } } void Monitor::RemoveQueue(const string& name) { if (bipc::message_queue::remove(name.c_str())) { cout << "Successfully removed '" << name << "'." << endl; } else { cout << "Did not remove '" << name << "'. Already removed?" << endl; } } void Monitor::RemoveMutex(const string& name) { if (bipc::named_mutex::remove(name.c_str())) { cout << "Successfully removed '" << name << "'." << endl; } else { cout << "Did not remove '" << name << "'. Already removed?" << endl; } } void Monitor::RemoveCondition(const string& name) { if (bipc::named_condition::remove(name.c_str())) { cout << "Successfully removed '" << name << "'." << endl; } else { cout << "Did not remove '" << name << "'. Already removed?" << endl; } } void Monitor::Cleanup(const ShmId& shmId) { cout << "Cleaning up for shared memory id '" << shmId.shmId << "'..." << endl; string managementSegmentName("fmq_" + shmId.shmId + "_mng"); try { bipc::managed_shared_memory managementSegment(bipc::open_only, managementSegmentName.c_str()); RegionCounter* rc = managementSegment.find(bipc::unique_instance).first; if (rc) { cout << "Region counter found: " << rc->fCount << endl; uint64_t regionCount = rc->fCount; Uint64RegionInfoMap* m = managementSegment.find(bipc::unique_instance).first; for (uint64_t i = 1; i <= regionCount; ++i) { if (m != nullptr) { RegionInfo ri = m->at(i); string path = ri.fPath.c_str(); int flags = ri.fFlags; cout << "Found RegionInfo with path: '" << path << "', flags: " << flags << ", fDestroyed: " << ri.fDestroyed << "." << endl; if (path != "") { RemoveFileMapping(tools::ToString(path, "fmq_" + shmId.shmId + "_rg_" + to_string(i))); } else { RemoveObject("fmq_" + shmId.shmId + "_rg_" + to_string(i)); } } else { RemoveObject("fmq_" + shmId.shmId + "_rg_" + to_string(i)); } RemoveQueue(string("fmq_" + shmId.shmId + "_rgq_" + to_string(i))); } } else { cout << "No region counter found. No regions to cleanup." << endl; } RemoveObject(managementSegmentName.c_str()); } catch (bie&) { cout << "Did not find '" << managementSegmentName << "' shared memory segment. No regions to cleanup." << endl; } catch(out_of_range& oor) { cout << "Could not locate element in the region map, out of range: " << oor.what() << endl; } RemoveObject("fmq_" + shmId.shmId + "_main"); RemoveMutex("fmq_" + shmId.shmId + "_mtx"); RemoveCondition("fmq_" + shmId.shmId + "_cv"); cout << endl; } void Monitor::Cleanup(const SessionId& sessionId) { ShmId shmId{buildShmIdFromSessionIdAndUserId(sessionId.sessionId)}; cout << "Cleanup called with session id '" << sessionId.sessionId << "', translating to shared memory id '" << shmId.shmId << "'" << endl; Cleanup(shmId); } void Monitor::CleanupFull(const ShmId& shmId) { Cleanup(shmId); RemoveMutex("fmq_" + shmId.shmId + "_ms"); RemoveQueue("fmq_" + shmId.shmId + "_cq"); } void Monitor::CleanupFull(const SessionId& sessionId) { ShmId shmId{buildShmIdFromSessionIdAndUserId(sessionId.sessionId)}; cout << "Cleanup called with session id '" << sessionId.sessionId << "', translating to shared memory id '" << shmId.shmId << "'" << endl; CleanupFull(shmId); } Monitor::~Monitor() { if (fSignalThread.joinable()) { fSignalThread.join(); } if (fCleanOnExit) { Cleanup(ShmId{fShmId}); } if (!fViewOnly) { RemoveMutex("fmq_" + fShmId + "_ms"); } } } // namespace shmem } // namespace mq } // namespace fair