Shm: throw on bad_alloc, option to disable

This commit is contained in:
Alexey Rybalchenko 2020-05-17 14:46:15 +02:00
parent 2916a491b9
commit 0d03c76a75
7 changed files with 42 additions and 29 deletions

View File

@ -21,7 +21,7 @@ class FairMQMessage
{ {
public: public:
FairMQMessage() = default; FairMQMessage() = default;
FairMQMessage(FairMQTransportFactory* factory):fTransport{factory} {} FairMQMessage(FairMQTransportFactory* factory) : fTransport(factory) {}
virtual void Rebuild() = 0; virtual void Rebuild() = 0;
virtual void Rebuild(const size_t size) = 0; virtual void Rebuild(const size_t size) = 0;
virtual void Rebuild(void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr) = 0; virtual void Rebuild(void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr) = 0;
@ -33,7 +33,7 @@ class FairMQMessage
virtual fair::mq::Transport GetType() const = 0; virtual fair::mq::Transport GetType() const = 0;
FairMQTransportFactory* GetTransport() { return fTransport; } FairMQTransportFactory* GetTransport() { return fTransport; }
//void SetTransport(FairMQTransportFactory* transport) { fTransport = transport; } void SetTransport(FairMQTransportFactory* transport) { fTransport = transport; }
virtual void Copy(const FairMQMessage& msg) = 0; virtual void Copy(const FairMQMessage& msg) = 0;
@ -53,6 +53,7 @@ namespace mq
using Message = FairMQMessage; using Message = FairMQMessage;
using MessagePtr = FairMQMessagePtr; using MessagePtr = FairMQMessagePtr;
struct MessageError : std::runtime_error { using std::runtime_error::runtime_error; }; struct MessageError : std::runtime_error { using std::runtime_error::runtime_error; };
struct MessageBadAlloc : std::runtime_error { using std::runtime_error::runtime_error; };
} /* namespace mq */ } /* namespace mq */
} /* namespace fair */ } /* namespace fair */

View File

@ -69,6 +69,7 @@ Plugin::ProgOptions ConfigPluginProgramOptions()
("max-run-time", po::value<uint64_t >()->default_value(0), "Maximum runtime for the Running state handler, after which state will change to Ready (in seconds, 0 for no limit).") ("max-run-time", po::value<uint64_t >()->default_value(0), "Maximum runtime for the Running state handler, after which state will change to Ready (in seconds, 0 for no limit).")
("print-channels", po::value<bool >()->implicit_value(true), "Print registered channel endpoints in a machine-readable format (<channel name>:<min num subchannels>:<max num subchannels>)") ("print-channels", po::value<bool >()->implicit_value(true), "Print registered channel endpoints in a machine-readable format (<channel name>:<min num subchannels>:<max num subchannels>)")
("shm-segment-size", po::value<size_t >()->default_value(2000000000), "Shared memory: size of the shared memory segment (in bytes).") ("shm-segment-size", po::value<size_t >()->default_value(2000000000), "Shared memory: size of the shared memory segment (in bytes).")
("shm-throw-bad-alloc", po::value<bool >()->default_value(true), "Throw a fair::mq::MessageBadAlloc if cannot allocate a message (retry if false).")
("shm-monitor", po::value<bool >()->default_value(true), "Shared memory: run monitor daemon.") ("shm-monitor", po::value<bool >()->default_value(true), "Shared memory: run monitor daemon.")
("ofi-size-hint", po::value<size_t >()->default_value(0), "EXPERIMENTAL: OFI size hint for the allocator.") ("ofi-size-hint", po::value<size_t >()->default_value(0), "EXPERIMENTAL: OFI size hint for the allocator.")
("rate", po::value<float >()->default_value(0.), "Rate for conditional run loop (Hz).") ("rate", po::value<float >()->default_value(0.), "Rate for conditional run loop (Hz).")

View File

@ -70,6 +70,7 @@ SAMPLER+=" --severity debug"
SAMPLER+=" --msg-size $msgSize" SAMPLER+=" --msg-size $msgSize"
SAMPLER+=" --multipart $multipart" SAMPLER+=" --multipart $multipart"
SAMPLER+=" --num-parts $numParts" SAMPLER+=" --num-parts $numParts"
SAMPLER+=" --shm-throw-bad-alloc false"
# SAMPLER+=" --msg-rate 1000" # SAMPLER+=" --msg-rate 1000"
SAMPLER+=" --max-iterations $maxIterations" SAMPLER+=" --max-iterations $maxIterations"
SAMPLER+=" --channel-config name=data,type=pair,method=bind,address=tcp://127.0.0.1:5555" SAMPLER+=" --channel-config name=data,type=pair,method=bind,address=tcp://127.0.0.1:5555"

View File

@ -52,7 +52,7 @@ struct SharedMemoryError : std::runtime_error { using std::runtime_error::runtim
class Manager class Manager
{ {
public: public:
Manager(std::string id, std::string deviceId, size_t size) Manager(std::string id, std::string deviceId, size_t size, bool throwOnBadAlloc)
: fShmId(std::move(id)) : fShmId(std::move(id))
, fDeviceId(std::move(deviceId)) , fDeviceId(std::move(deviceId))
, fSegmentName("fmq_" + fShmId + "_main") , fSegmentName("fmq_" + fShmId + "_main")
@ -69,6 +69,7 @@ class Manager
, fMsgCounter(0) , fMsgCounter(0)
, fHeartbeatThread() , fHeartbeatThread()
, fSendHeartbeats(true) , fSendHeartbeats(true)
, fThrowOnBadAlloc(throwOnBadAlloc)
{ {
using namespace boost::interprocess; using namespace boost::interprocess;
LOG(debug) << "created/opened shared memory segment '" << "fmq_" << fShmId << "_main" << "' of " << size << " bytes. Available are " << fSegment.get_free_memory() << " bytes."; LOG(debug) << "created/opened shared memory segment '" << "fmq_" << fShmId << "_main" << "' of " << size << " bytes. Available are " << fSegment.get_free_memory() << " bytes.";
@ -415,6 +416,8 @@ class Manager
} }
} }
bool ThrowingOnBadAlloc() const { return fThrowOnBadAlloc; }
private: private:
std::string fShmId; std::string fShmId;
std::string fDeviceId; std::string fDeviceId;
@ -440,6 +443,7 @@ class Manager
std::thread fHeartbeatThread; std::thread fHeartbeatThread;
std::atomic<bool> fSendHeartbeats; std::atomic<bool> fSendHeartbeats;
bool fThrowOnBadAlloc;
}; };
} // namespace shmem } // namespace shmem

View File

@ -12,7 +12,7 @@
#include "Manager.h" #include "Manager.h"
#include "Region.h" #include "Region.h"
#include "UnmanagedRegion.h" #include "UnmanagedRegion.h"
#include <fairmq/Tools.h>
#include <FairMQLogger.h> #include <FairMQLogger.h>
#include <FairMQMessage.h> #include <FairMQMessage.h>
#include <FairMQUnmanagedRegion.h> #include <FairMQUnmanagedRegion.h>
@ -37,7 +37,7 @@ class Message final : public fair::mq::Message
public: public:
Message(Manager& manager, FairMQTransportFactory* factory = nullptr) Message(Manager& manager, FairMQTransportFactory* factory = nullptr)
: fair::mq::Message{factory} : fair::mq::Message(factory)
, fManager(manager) , fManager(manager)
, fQueued(false) , fQueued(false)
, fMeta{0, 0, 0, -1} , fMeta{0, 0, 0, -1}
@ -48,7 +48,7 @@ class Message final : public fair::mq::Message
} }
Message(Manager& manager, const size_t size, FairMQTransportFactory* factory = nullptr) Message(Manager& manager, const size_t size, FairMQTransportFactory* factory = nullptr)
: fair::mq::Message{factory} : fair::mq::Message(factory)
, fManager(manager) , fManager(manager)
, fQueued(false) , fQueued(false)
, fMeta{0, 0, 0, -1} , fMeta{0, 0, 0, -1}
@ -60,7 +60,7 @@ class Message final : public fair::mq::Message
} }
Message(Manager& manager, void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr, FairMQTransportFactory* factory = nullptr) Message(Manager& manager, void* data, const size_t size, fairmq_free_fn* ffn, void* hint = nullptr, FairMQTransportFactory* factory = nullptr)
: fair::mq::Message{factory} : fair::mq::Message(factory)
, fManager(manager) , fManager(manager)
, fQueued(false) , fQueued(false)
, fMeta{0, 0, 0, -1} , fMeta{0, 0, 0, -1}
@ -79,7 +79,7 @@ class Message final : public fair::mq::Message
} }
Message(Manager& manager, UnmanagedRegionPtr& region, void* data, const size_t size, void* hint = 0, FairMQTransportFactory* factory = nullptr) Message(Manager& manager, UnmanagedRegionPtr& region, void* data, const size_t size, void* hint = 0, FairMQTransportFactory* factory = nullptr)
: fair::mq::Message{factory} : fair::mq::Message(factory)
, fManager(manager) , fManager(manager)
, fQueued(false) , fQueued(false)
, fMeta{size, static_cast<UnmanagedRegion*>(region.get())->fRegionId, reinterpret_cast<size_t>(hint), -1} , fMeta{size, static_cast<UnmanagedRegion*>(region.get())->fRegionId, reinterpret_cast<size_t>(hint), -1}
@ -97,7 +97,7 @@ class Message final : public fair::mq::Message
} }
Message(Manager& manager, MetaHeader& hdr, FairMQTransportFactory* factory = nullptr) Message(Manager& manager, MetaHeader& hdr, FairMQTransportFactory* factory = nullptr)
: fair::mq::Message{factory} : fair::mq::Message(factory)
, fManager(manager) , fManager(manager)
, fQueued(false) , fQueued(false)
, fMeta{hdr} , fMeta{hdr}
@ -221,6 +221,8 @@ class Message final : public fair::mq::Message
bool InitializeChunk(const size_t size) bool InitializeChunk(const size_t size)
{ {
tools::RateLimiter rateLimiter(20);
while (fMeta.fHandle < 0) { while (fMeta.fHandle < 0) {
try { try {
boost::interprocess::managed_shared_memory::size_type actualSize = size; boost::interprocess::managed_shared_memory::size_type actualSize = size;
@ -228,7 +230,10 @@ class Message final : public fair::mq::Message
fLocalPtr = fManager.Segment().allocation_command<char>(boost::interprocess::allocate_new, size, actualSize, hint); fLocalPtr = fManager.Segment().allocation_command<char>(boost::interprocess::allocate_new, size, actualSize, hint);
} catch (boost::interprocess::bad_alloc& ba) { } catch (boost::interprocess::bad_alloc& ba) {
// LOG(warn) << "Shared memory full..."; // LOG(warn) << "Shared memory full...";
std::this_thread::sleep_for(std::chrono::milliseconds(50)); if (fManager.ThrowingOnBadAlloc()) {
throw MessageBadAlloc(tools::ToString("shmem: could not create a message of size ", size));
}
rateLimiter.maybe_sleep();
if (fManager.Interrupted()) { if (fManager.Interrupted()) {
return false; return false;
} else { } else {

View File

@ -59,11 +59,13 @@ class TransportFactory final : public fair::mq::TransportFactory
std::string sessionName = "default"; std::string sessionName = "default";
size_t segmentSize = 2000000000; size_t segmentSize = 2000000000;
bool autolaunchMonitor = false; bool autolaunchMonitor = false;
bool throwOnBadAlloc = true;
if (config) { if (config) {
numIoThreads = config->GetProperty<int>("io-threads", numIoThreads); numIoThreads = config->GetProperty<int>("io-threads", numIoThreads);
sessionName = config->GetProperty<std::string>("session", sessionName); sessionName = config->GetProperty<std::string>("session", sessionName);
segmentSize = config->GetProperty<size_t>("shm-segment-size", segmentSize); segmentSize = config->GetProperty<size_t>("shm-segment-size", segmentSize);
autolaunchMonitor = config->GetProperty<bool>("shm-monitor", autolaunchMonitor); autolaunchMonitor = config->GetProperty<bool>("shm-monitor", autolaunchMonitor);
throwOnBadAlloc = config->GetProperty<bool>("shm-throw-bad-alloc", throwOnBadAlloc);
} else { } else {
LOG(debug) << "ProgOptions not available! Using defaults."; LOG(debug) << "ProgOptions not available! Using defaults.";
} }
@ -84,8 +86,7 @@ class TransportFactory final : public fair::mq::TransportFactory
Manager::StartMonitor(fShmId); Manager::StartMonitor(fShmId);
} }
fManager = tools::make_unique<Manager>(fShmId, fDeviceId, segmentSize); fManager = tools::make_unique<Manager>(fShmId, fDeviceId, segmentSize, throwOnBadAlloc);
} catch (boost::interprocess::interprocess_exception& e) { } catch (boost::interprocess::interprocess_exception& e) {
LOG(error) << "Could not initialize shared memory transport: " << e.what(); LOG(error) << "Could not initialize shared memory transport: " << e.what();
throw std::runtime_error(tools::ToString("Could not initialize shared memory transport: ", e.what())); throw std::runtime_error(tools::ToString("Could not initialize shared memory transport: ", e.what()));