Compare commits

..

7 Commits

Author SHA1 Message Date
Alexey Rybalchenko
d105960444 fix(shm): Fix incorrect parameters when mapping regions 2022-09-06 08:09:47 +02:00
Dennis Klein
3aae5bae58 build: Add ORCID for Christian Tacke 2022-09-06 08:08:42 +02:00
Dennis Klein
9031029d2c build: Add ORCID for Dennis Klein 2022-09-06 08:08:34 +02:00
Dennis Klein
d478e050ba build: HTML-Format desc field in zenodo.org config 2022-09-06 08:08:14 +02:00
Dennis Klein
06b2b9b01f build: Add license hint to zenodo.org config 2022-09-06 08:08:05 +02:00
Dennis Klein
b3fa4f6e7e build: Add config for zenodo.org import 2022-09-06 08:07:46 +02:00
Alexey Rybalchenko
da5cb34416 fix(shm): race/deadlock in region locks 2022-08-21 18:32:24 +02:00
7 changed files with 170 additions and 54 deletions

86
.zenodo.json Normal file
View File

@@ -0,0 +1,86 @@
{
"creators": [
{
"name": "Al-Turany, Mohammad"
},
{
"orcid": "0000-0003-3787-1910",
"name": "Klein, Dennis"
},
{
"name": "Kollegger, Thorsten"
},
{
"name": "Rybalchenko, Alexey"
},
{
"name": "Winckler, Nicolas"
}
],
"contributors": [
{
"type": "Other",
"name": "Aphecetche, Laurent"
},
{
"type": "Other",
"name": "Binet, Sebastien"
},
{
"type": "Other",
"name": "Eulisse, Giulio"
},
{
"type": "Other",
"name": "Karabowicz, Radoslaw"
},
{
"type": "Other",
"name": "Kretz, Matthias"
},
{
"type": "Other",
"name": "Krzewicki, Mikolaj"
},
{
"type": "Other",
"name": "Lebedev, Andrey"
},
{
"type": "Other",
"name": "Mrnjavac, Teo"
},
{
"type": "Other",
"name": "Neskovic, Gvozden"
},
{
"type": "Other",
"name": "Richter, Matthias"
},
{
"type": "Other",
"orcid": "0000-0002-5321-8404",
"name": "Tacke, Christian"
},
{
"type": "Other",
"name": "Uhlig, Florian"
},
{
"type": "Other",
"name": "Wenzel, Sandro"
}
],
"description": "<p>C++ Message Queuing Library and Framework</p>",
"related_identifiers": [
{
"identifier": "https://github.com/FairRootGroup/FairMQ/",
"relation": "isSupplementTo",
"resource_type": "software",
"scheme": "url"
}
],
"title": "FairMQ",
"license": "LGPL-3.0-only"
}

View File

@@ -1,5 +1,5 @@
Al-Turany, Mohammad
Klein, Dennis
Klein, Dennis [https://orcid.org/0000-0003-3787-1910]
Kollegger, Thorsten
Rybalchenko, Alexey
Winckler, Nicolas

View File

@@ -3,11 +3,11 @@ Binet, Sebastien
Eulisse, Giulio
Karabowicz, Radoslaw
Kretz, Matthias <kretz@kde.org>
Krzewicki, Mikolaj
Krzewicki, Mikolaj
Lebedev, Andrey
Mrnjavac, Teo <teo.m@cern.ch>
Neskovic, Gvozden
Richter, Matthias
Tacke, Christian
Tacke, Christian [https://orcid.org/0000-0002-5321-8404]
Uhlig, Florian
Wenzel, Sandro

View File

@@ -18,7 +18,8 @@
{
"@type": "Person",
"givenName": "Dennis",
"familyName": "Klein"
"familyName": "Klein",
"@id": "https://orcid.org/0000-0003-3787-1910"
},
{
"@type": "Person",
@@ -92,7 +93,8 @@
{
"@type": "Person",
"givenName": "Christian",
"familyName": "Tacke"
"familyName": "Tacke",
"@id": "https://orcid.org/0000-0002-5321-8404"
},
{
"@type": "Person",

View File

@@ -76,6 +76,8 @@ struct Sampler : fair::mq::Device
void ResetTask() override
{
// give some time for acks to be received
std::this_thread::sleep_for(std::chrono::milliseconds(250));
fRegion.reset();
{
std::lock_guard<std::mutex> lock(fMtx);

View File

@@ -397,12 +397,10 @@ class Manager
UnmanagedRegion* region = nullptr;
bool newRegionCreated = false;
{
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
auto res = fRegions.emplace(id, std::make_unique<UnmanagedRegion>(fShmId, size, false, cfg));
newRegionCreated = res.second;
region = res.first->second.get();
}
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
auto res = fRegions.emplace(id, std::make_unique<UnmanagedRegion>(fShmId, size, false, cfg));
newRegionCreated = res.second;
region = res.first->second.get();
// LOG(debug) << "Created region with id '" << id << "', path: '" << cfg.path << "', flags: '" << cfg.creationFlags << "'";
if (!newRegionCreated) {
@@ -429,7 +427,7 @@ class Manager
}
}
UnmanagedRegion* GetRegion(uint16_t id)
UnmanagedRegion* GetRegionFromCache(uint16_t id)
{
// NOTE: gcc optimizations. Prevent loading tls addresses many times in the fast path
const auto &lTlCache = fTlRegionCache;
@@ -443,41 +441,40 @@ class Manager
}
}
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
// slow path: check invalidation
if (lTlCacheGen != fRegionsGen) {
fTlRegionCache.fRegionsTLCache.clear();
}
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
auto* lRegion = GetRegionUnsafe(id, shmLock);
auto* lRegion = GetRegion(id);
fTlRegionCache.fRegionsTLCache.emplace_back(std::make_tuple(lRegion, id, fShmId64));
fTlRegionCache.fRegionsTLCacheGen = fRegionsGen;
return lRegion;
}
UnmanagedRegion* GetRegionUnsafe(uint16_t id, boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex>& lockedShmLock)
UnmanagedRegion* GetRegion(uint16_t id)
{
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
// remote region could actually be a local one if a message originates from this device (has been sent out and returned)
auto it = fRegions.find(id);
if (it != fRegions.end()) {
return it->second.get();
} else {
try {
// get region info
RegionInfo regionInfo = fShmRegions->at(id);
// safe to unlock now - no shm container accessed after this
lockedShmLock.unlock();
RegionConfig cfg;
cfg.id = id;
cfg.creationFlags = regionInfo.fCreationFlags;
cfg.path = regionInfo.fPath.c_str();
// get region info
{
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
RegionInfo regionInfo = fShmRegions->at(id);
cfg.id = id;
cfg.creationFlags = regionInfo.fCreationFlags;
cfg.path = regionInfo.fPath.c_str();
}
// LOG(debug) << "Located remote region with id '" << id << "', path: '" << cfg.path << "', flags: '" << cfg.creationFlags << "'";
auto r = fRegions.emplace(id, std::make_unique<UnmanagedRegion>(fShmId, 0, true, std::move(cfg)));
r.first->second->InitializeQueues();
r.first->second->StartAckSender();
lockedShmLock.lock();
return r.first->second.get();
} catch (std::out_of_range& oor) {
LOG(error) << "Could not get remote region with id '" << id << "'. Does the region creator run with the same session id?";
@@ -493,10 +490,10 @@ class Manager
void RemoveRegion(uint16_t id)
{
try {
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
fRegions.at(id)->StopAcks();
{
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
if (fRegions.at(id)->RemoveOnDestruction()) {
fShmRegions->at(id).fDestroyed = true;
(fEventCounter->fCount)++;
@@ -512,44 +509,73 @@ class Manager
std::vector<fair::mq::RegionInfo> GetRegionInfo()
{
std::vector<fair::mq::RegionInfo> result;
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
std::map<uint64_t, RegionConfig> regionCfgs;
for (const auto& e : *fShmSegments) {
// make sure any segments in the session are found
GetSegment(e.first);
try {
{
boost::interprocess::scoped_lock<boost::interprocess::interprocess_mutex> shmLock(*fShmMtx);
for (const auto& [segmentId, segmentInfo] : *fShmSegments) {
// make sure any segments in the session are found
GetSegment(segmentId);
try {
fair::mq::RegionInfo info;
info.managed = true;
info.id = segmentId;
info.event = RegionEvent::created;
info.ptr = boost::apply_visitor(SegmentAddress(), fSegments.at(segmentId));
info.size = boost::apply_visitor(SegmentSize(), fSegments.at(segmentId));
result.push_back(info);
} catch (const std::out_of_range& oor) {
LOG(error) << "could not find segment with id " << segmentId;
LOG(error) << oor.what();
}
}
for (const auto& [regionId, regionInfo] : *fShmRegions) {
fair::mq::RegionInfo info;
info.managed = true;
info.id = e.first;
info.event = RegionEvent::created;
info.ptr = boost::apply_visitor(SegmentAddress(), fSegments.at(e.first));
info.size = boost::apply_visitor(SegmentSize(), fSegments.at(e.first));
info.managed = false;
info.id = regionId;
info.flags = regionInfo.fUserFlags;
info.event = regionInfo.fDestroyed ? RegionEvent::destroyed : RegionEvent::created;
if (info.event == RegionEvent::created) {
RegionConfig cfg;
cfg.id = info.id;
cfg.creationFlags = regionInfo.fCreationFlags;
cfg.path = regionInfo.fPath.c_str();
regionCfgs.emplace(info.id, cfg);
// fill the ptr+size info after shmLock is released, to avoid constructing local region under it
} else {
info.ptr = nullptr;
info.size = 0;
}
result.push_back(info);
} catch (const std::out_of_range& oor) {
LOG(error) << "could not find segment with id " << e.first;
LOG(error) << oor.what();
}
}
for (const auto& e : *fShmRegions) {
fair::mq::RegionInfo info;
info.managed = false;
info.id = e.first;
info.flags = e.second.fUserFlags;
info.event = e.second.fDestroyed ? RegionEvent::destroyed : RegionEvent::created;
if (info.event == RegionEvent::created) {
auto region = GetRegionUnsafe(info.id, shmLock);
if (region) {
// do another iteration outside of shm lock, to fill ptr+size of unmanaged regions
for (auto& info : result) {
if (!info.managed && info.event == RegionEvent::created) {
auto cfgIt = regionCfgs.find(info.id);
if (cfgIt != regionCfgs.end()) {
UnmanagedRegion* region = nullptr;
std::lock_guard<std::mutex> lock(fLocalRegionsMtx);
auto it = fRegions.find(info.id);
if (it != fRegions.end()) {
region = it->second.get();
} else {
auto r = fRegions.emplace(cfgIt->first, std::make_unique<UnmanagedRegion>(fShmId, 0, true, cfgIt->second));
region = r.first->second.get();
region->InitializeQueues();
region->StartAckSender();
}
info.ptr = region->GetData();
info.size = region->GetSize();
} else {
throw std::runtime_error(tools::ToString("GetRegionInfo() could not get region with id '", info.id, "'"));
info.ptr = nullptr;
info.size = 0;
}
} else {
info.ptr = nullptr;
info.size = 0;
}
result.push_back(info);
}
return result;

View File

@@ -195,7 +195,7 @@ class Message final : public fair::mq::Message
fLocalPtr = nullptr;
}
} else {
fRegionPtr = fManager.GetRegion(fMeta.fRegionId);
fRegionPtr = fManager.GetRegionFromCache(fMeta.fRegionId);
if (fRegionPtr) {
fLocalPtr = reinterpret_cast<char*>(fRegionPtr->GetData()) + fMeta.fHandle;
} else {
@@ -365,7 +365,7 @@ class Message final : public fair::mq::Message
void ReleaseUnmanagedRegionBlock()
{
if (!fRegionPtr) {
fRegionPtr = fManager.GetRegion(fMeta.fRegionId);
fRegionPtr = fManager.GetRegionFromCache(fMeta.fRegionId);
}
if (fRegionPtr) {