Skip to content

Commit

Permalink
Implement scraper global statistics cache optimization
Browse files Browse the repository at this point in the history
This commit changes the ConvergedManifestPartsMap to a
map of pointers, ConvergedManifestPartPtrsMap.

All of the pointer ownership problems may not be solved yet by
this commit, but most of them are. The CScraperManifest held by
the ConvergedManifest is via a shared_ptr. In turn the CParts
in the CScraperManifest::CSplitBlob vParts are also mapped into
the ConvergedManifestPartPtrsMap, which is indexed by project.
The part pointers in the ConvergedManifestPartPtrsMap are valid
for the same lifetime as the CScraperManifest held by shared pointer
in the ConvergedManifest, so I think this is safe.

Thjs minimizes the changes to the rest of the scraper to achieve
this optimization. I am less certain of the pointer safety in the
changes I made in the quorum and superblock classes.
  • Loading branch information
jamescowens committed Aug 18, 2020
1 parent c0a6573 commit 330f9e7
Show file tree
Hide file tree
Showing 7 changed files with 451 additions and 117 deletions.
18 changes: 9 additions & 9 deletions src/neuralnet/quorum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -788,11 +788,11 @@ class SuperblockValidator
//! \param project_name Identifies the project to add.
//! \param project_part_data Serialized project stats of the part.
//!
void AddPart(std::string project_name, CSerializeData project_part_data)
void AddPart(std::string project_name, CSplitBlob::CPart* project_part_ptr)
{
m_convergence.ConvergedManifestPartsMap.emplace(
m_convergence.ConvergedManifestPartPtrsMap.emplace(
std::move(project_name),
std::move(project_part_data));
std::move(project_part_ptr));
}

//!
Expand Down Expand Up @@ -944,7 +944,7 @@ class SuperblockValidator

convergence.AddPart(
project_pair.first, // project name
GetResolvedPartData(resolved_part.m_part_hash));
GetResolvedPartPtr(resolved_part.m_part_hash));

remainder -= part_index * project.m_combiner_mask;

Expand Down Expand Up @@ -981,7 +981,7 @@ class SuperblockValidator
//!
//! \return Serialized binary data of the part to add to a convergence.
//!
static CSerializeData GetResolvedPartData(const uint256& part_hash)
static CSplitBlob::CPart* GetResolvedPartPtr(const uint256& part_hash)
{
LOCK(CSplitBlob::cs_mapParts);

Expand All @@ -991,10 +991,10 @@ class SuperblockValidator
// the most recent project part should always exist:
if (iter == CSplitBlob::mapParts.end()) {
LogPrintf("ValidateSuperblock(): project part disappeared.");
return CSerializeData();
return nullptr;
}

return iter->second.data;
return &(iter->second);
}

//!
Expand Down Expand Up @@ -1029,7 +1029,7 @@ class SuperblockValidator
return;
}

convergence.AddPart("BeaconList", manifest.vParts[0]->data);
convergence.AddPart("BeaconList", manifest.vParts[0]);

// Find the offset of the verified beacons project part. Typically
// this exists at vParts offset 1 when a scraper verified at least
Expand All @@ -1054,7 +1054,7 @@ class SuperblockValidator
return;
}

convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]->data);
convergence.AddPart("VerifiedBeacons", manifest.vParts[part_offset]);
}
}; // ProjectCombiner

Expand Down
1 change: 1 addition & 0 deletions src/neuralnet/quorum.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once

#include <string>
#include "scraper_net.h"

class CBlockIndex;

Expand Down
4 changes: 2 additions & 2 deletions src/neuralnet/superblock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,9 +557,9 @@ Superblock Superblock::FromConvergence(
// Add hints created from the hashes of converged manifest parts to each
// superblock project section to assist receiving nodes with validation:
//
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartsMap) {
for (const auto& part_pair : stats.Convergence.ConvergedManifestPartPtrsMap) {
const std::string& project_name = part_pair.first;
const CSerializeData& part_data = part_pair.second;
const CSerializeData& part_data = part_pair.second->data;

projects.SetHint(project_name, part_data);
}
Expand Down
25 changes: 25 additions & 0 deletions src/neuralnet/superblock.h
Original file line number Diff line number Diff line change
Expand Up @@ -1533,6 +1533,20 @@ struct hash<NN::QuorumHash>
// This is part of the scraper but is put here, because it needs the complete NN:Superblock class.
struct ConvergedScraperStats
{
ConvergedScraperStats() : Convergence(), NewFormatSuperblock()
{
bClean = false;

nTime = 0;
mScraperConvergedStats = {};
PastConvergences = {};
}

ConvergedScraperStats(const int64_t nTime_in, const ConvergedManifest& Convergence) : Convergence(Convergence)
{
nTime = nTime_in;
}

// Flag to indicate cache is clean or dirty (i.e. state change of underlying statistics has occurred.
// This flag is marked true in ScraperGetSuperblockContract() and false on receipt or deletion of
// statistics objects.
Expand All @@ -1558,7 +1572,18 @@ struct ConvergedScraperStats
{
// This is specifically this form of insert to insure that if there is a hint "collision" the referenced
// SB Hash and Convergence stored will be the LATER one.

PastConvergences[nReducedContentHash] = std::make_pair(NewFormatSuperblock.GetHash(), Convergence);

/*
if (PastConvergences.find(nReducedContentHash) != PastConvergences.end())
{
PastConvergences.erase(nReducedContentHash);
}
PastConvergences.emplace(std::make_pair(nReducedContentHash, Convergence));
*/

}
}

Expand Down
234 changes: 233 additions & 1 deletion src/scraper/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
#include "util.h"
#include "streams.h"

#include "scraper_net.h"

/*********************
* Scraper ENUMS *
*********************/
Expand Down Expand Up @@ -60,8 +62,178 @@ typedef std::map<std::string, CSerializeData> mConvergedManifestParts;
// Note that this IS a copy not a pointer. Since manifests and parts can be deleted because of aging rules,
// it is dangerous to save memory and point to the actual part objects themselves.

typedef std::map<std::string, CSplitBlob::CPart*> mConvergedManifestPart_ptrs;

struct ConvergedManifest
{
ConvergedManifest()
{
nContentHash = {};
ConsensusBlock = {};
timestamp = 0;
bByParts = false;

CScraperConvergedManifest_ptr = nullptr;

//ConvergedManifestPartsMap = {};

ConvergedManifestPartPtrsMap = {};

mIncludedScraperManifests = {};

nUnderlyingManifestContentHash = {};

vIncludedScrapers = {};
vExcludedScrapers = {};
vScrapersNotPublishing = {};

mIncludedScrapersbyProject = {};
mIncludedProjectsbyScraper = {};

mScraperConvergenceCountbyProject = {};

vExcludedProjects = {};
}

ConvergedManifest(const ConvergedManifest& in)
{
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
nContentHash = in.nContentHash;

ConsensusBlock = in.ConsensusBlock;
timestamp = in.timestamp;
bByParts = in.bByParts;

CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;

PopulateConvergedManifestPartPtrsMap();

// CScraperConvergedManifest_ptr = std::move(in.CScraperConvergedManifest_ptr);

// We are going to make a copy of the manifest here and create a new pointer to it. We
// need to do this because of the const qualifier (for use in const iterators).

//CScraperManifest CScraperConvergedManifest = *(in.CScraperConvergedManifest_ptr);

//std::shared_ptr<CScraperManifest> ptr(&CScraperConvergedManifest);

//CScraperConvergedManifest_ptr = std::move(ptr);

//ConvergedManifestPartsMap = in.ConvergedManifestPartsMap;

mIncludedScraperManifests = in.mIncludedScraperManifests;

nUnderlyingManifestContentHash = in.nUnderlyingManifestContentHash;

vIncludedScrapers = in.vIncludedScrapers;
vExcludedScrapers = in.vExcludedScrapers;
vScrapersNotPublishing = in.vScrapersNotPublishing;

mIncludedScrapersbyProject = in.mIncludedScrapersbyProject;
mIncludedProjectsbyScraper = in.mIncludedProjectsbyScraper;

mScraperConvergenceCountbyProject = in.mScraperConvergenceCountbyProject;

vExcludedProjects = in.vExcludedProjects;
}

// For constructing a dummy converged manifest from a single manifest
ConvergedManifest(CScraperManifest& in)
{
ConsensusBlock = in.ConsensusBlock;
timestamp = GetAdjustedTime();
bByParts = false;

CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);

PopulateConvergedManifestPartPtrsMap();

ComputeConvergedContentHash();

nUnderlyingManifestContentHash = in.nContentHash;
}


void operator()(ConvergedManifest& in)
{
// We can use the content hash from the specified converged manifest. We do not need to recompute it.
nContentHash = in.nContentHash;

ConsensusBlock = in.ConsensusBlock;
timestamp = in.timestamp;
bByParts = in.bByParts;

CScraperConvergedManifest_ptr = in.CScraperConvergedManifest_ptr;

//CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);

PopulateConvergedManifestPartPtrsMap();

//ConvergedManifestPartsMap = in.ConvergedManifestPartsMap;

mIncludedScraperManifests = in.mIncludedScraperManifests;

nUnderlyingManifestContentHash = in.nUnderlyingManifestContentHash;

vIncludedScrapers = in.vIncludedScrapers;
vExcludedScrapers = in.vExcludedScrapers;
vScrapersNotPublishing = in.vScrapersNotPublishing;

mIncludedScrapersbyProject = in.mIncludedScrapersbyProject;
mIncludedProjectsbyScraper = in.mIncludedProjectsbyScraper;

mScraperConvergenceCountbyProject = in.mScraperConvergenceCountbyProject;

vExcludedProjects = in.vExcludedProjects;
}

bool operator()(const CScraperManifest& in)
{
ConsensusBlock = in.ConsensusBlock;
timestamp = GetAdjustedTime();
bByParts = false;

CScraperConvergedManifest_ptr = std::make_shared<CScraperManifest>(in);

bool bConvergedContentHashMatches = PopulateConvergedManifestPartPtrsMap();

ComputeConvergedContentHash();

nUnderlyingManifestContentHash = in.nContentHash;

return bConvergedContentHashMatches;
}

void Reset()
{
nContentHash = {};
ConsensusBlock = {};
timestamp = 0;
bByParts = false;

CScraperConvergedManifest_ptr = nullptr;

//ConvergedManifestPartsMap = {};

ConvergedManifestPartPtrsMap = {};

mIncludedScraperManifests = {};

nUnderlyingManifestContentHash = {};

vIncludedScrapers = {};
vExcludedScrapers = {};
vScrapersNotPublishing = {};

mIncludedScrapersbyProject = {};
mIncludedProjectsbyScraper = {};

mScraperConvergenceCountbyProject = {};

vExcludedProjects = {};
}


// IMPORTANT... nContentHash is NOT the hash of part hashes in the order of vParts unlike CScraper::manifest.
// It is the hash of the data in the ConvergedManifestPartsMap in the order of the key. It represents
// the composite convergence by taking parts piecewise in the case of the fallback to bByParts (project) level.
Expand All @@ -70,7 +242,11 @@ struct ConvergedManifest
int64_t timestamp;
bool bByParts;

mConvergedManifestParts ConvergedManifestPartsMap;
std::shared_ptr<CScraperManifest> CScraperConvergedManifest_ptr;

// mConvergedManifestParts ConvergedManifestPartsMap;

mConvergedManifestPart_ptrs ConvergedManifestPartPtrsMap;

// Used when convergence is at the manifest level (normal)
std::map<ScraperID, uint256> mIncludedScraperManifests;
Expand All @@ -97,6 +273,62 @@ struct ConvergedManifest

// --------- project
std::vector<std::string> vExcludedProjects;

bool PopulateConvergedManifestPartPtrsMap()
{
if (CScraperConvergedManifest_ptr == nullptr) return false;

int iPartNum = 0;
CDataStream ss(SER_NETWORK,1);
WriteCompactSize(ss, CScraperConvergedManifest_ptr->vParts.size());
uint256 nContentHashCheck;

for (const auto& iter : CScraperConvergedManifest_ptr->vParts)
{
std::string sProject;

if (iPartNum == 0)
sProject = "BeaconList";
else
sProject = CScraperConvergedManifest_ptr->projects[iPartNum-1].project;

// Copy the pointer to the CPart into the map. This is ok, because the parts will be held
// until the CScraperManifest in this object is destroyed and all of the manifest refs to the part
// are gone.
ConvergedManifestPartPtrsMap.insert(std::make_pair(sProject, iter));

// Serialize the hash to doublecheck the content hash.
ss << iter->hash;

iPartNum++;
}

ss << CScraperConvergedManifest_ptr->ConsensusBlock;

nContentHashCheck = Hash(ss.begin(), ss.end());

if (nContentHashCheck != CScraperConvergedManifest_ptr->nContentHash)
{
LogPrintf("ERROR: PopulateConvergedManifestPartPtrsMap(): Selected Manifest content hash check failed! "
"nContentHashCheck = %s and nContentHash = %s.",
nContentHashCheck.GetHex(), CScraperConvergedManifest_ptr->nContentHash.GetHex());
return false;
}

return true;
}

void ComputeConvergedContentHash()
{
CDataStream ss(SER_NETWORK,1);

for (const auto& iter : ConvergedManifestPartPtrsMap)
{
ss << iter.second->data;
}

nContentHash = Hash(ss.begin(), ss.end());
}
};


Expand Down
Loading

0 comments on commit 330f9e7

Please sign in to comment.