vis: huge speedup from fixing false sharing with stat counters

gmsp3v2.bsp, 32 threads 3950x, 157s -> 61s
This commit is contained in:
Eric Wasylishen 2023-11-07 23:55:21 -07:00
parent 7e5ad9a39b
commit c9edf4ddaa
3 changed files with 93 additions and 62 deletions

View File

@ -196,15 +196,45 @@ struct pstack_t
// important for perf as a ton of these are stack allocated, needs to be be just a pointer bump
static_assert(std::is_trivially_default_constructible_v<pstack_t>);
struct visstats_t
{
int64_t c_portaltest = 0;
int64_t c_portalpass = 0;
int64_t c_portalcheck = 0;
int64_t c_mightseeupdate = 0;
int64_t c_noclip = 0;
int64_t c_vistest = 0;
int64_t c_mighttest = 0;
int64_t c_chains = 0;
int64_t c_leafskip = 0;
int64_t c_portalskip = 0;
visstats_t operator+(const visstats_t& other) const {
visstats_t result;
result.c_portaltest = this->c_portaltest + other.c_portaltest;
result.c_portalpass = this->c_portalpass + other.c_portalpass;
result.c_portalcheck = this->c_portalcheck + other.c_portalcheck;
result.c_mightseeupdate = this->c_mightseeupdate + other.c_mightseeupdate;
result.c_noclip = this->c_noclip + other.c_noclip;
result.c_vistest = this->c_vistest + other.c_vistest;
result.c_mighttest = this->c_mighttest + other.c_mighttest;
result.c_chains = this->c_chains + other.c_chains;
result.c_leafskip = this->c_leafskip + other.c_leafskip;
result.c_portalskip = this->c_portalskip + other.c_portalskip;
return result;
}
};
viswinding_t *AllocStackWinding(pstack_t &stack);
void FreeStackWinding(viswinding_t *&w, pstack_t &stack);
viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d &split);
viswinding_t *ClipStackWinding(visstats_t &stats, viswinding_t *in, pstack_t &stack, const qplane3d &split);
struct threaddata_t
{
leafbits_t &leafvis;
visportal_t *base;
pstack_t pstack_head;
visstats_t stats;
};
extern int numportals;
@ -214,13 +244,6 @@ extern int portalleafs_real;
extern std::vector<visportal_t> portals; // always numportals * 2; front and back
extern std::vector<leaf_t> leafs;
extern int c_noclip;
extern int c_portaltest, c_portalpass, c_portalcheck;
extern int c_vistest, c_mighttest;
extern unsigned long c_chains;
extern bool showgetleaf;
extern std::vector<uint8_t> uncompressed;
extern int leafbytes;
extern int leafbytes_real;
@ -230,7 +253,7 @@ extern fs::path portalfile, statefile, statetmpfile;
void BasePortalVis(void);
void PortalFlow(visportal_t *p);
visstats_t PortalFlow(visportal_t *p);
void CalcAmbientSounds(mbsp_t *bsp);

View File

@ -4,12 +4,6 @@
#include <common/parallel.hh>
#include <atomic>
unsigned long c_chains;
int c_vistest, c_mighttest;
static int c_portalskip;
static int c_leafskip;
/*
==============
ClipToSeparators
@ -30,7 +24,7 @@ static int c_leafskip;
pointer, was measurably faster
==============
*/
static void ClipToSeparators(const viswinding_t *source, const qplane3d src_pl, const viswinding_t *pass,
static void ClipToSeparators(visstats_t &stats, const viswinding_t *source, const qplane3d src_pl, const viswinding_t *pass,
viswinding_t *&target, unsigned int test, pstack_t &stack)
{
int i, j, k, l;
@ -114,7 +108,7 @@ static void ClipToSeparators(const viswinding_t *source, const qplane3d src_pl,
stack.numseparators[test]++;
}
target = ClipStackWinding(target, stack, sep);
target = ClipStackWinding(stats, target, stack, sep);
if (!target)
return; // target is not visible
@ -150,7 +144,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
leaf_t *leaf;
int i, j, err, numblocks;
++c_chains;
++thread->stats.c_chains;
leaf = &leafs[leafnum];
@ -193,7 +187,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
p = leaf->portals[i];
if (!(*prevstack.mightsee)[p->leaf]) {
c_leafskip++;
thread->stats.c_leafskip++;
continue; // can't possibly see it
}
@ -201,10 +195,10 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
// if the portal can't see anything we haven't allready seen, skip it
if (p->status == pstat_done) {
c_vistest++;
thread->stats.c_vistest++;
test = p->visbits.data();
} else {
c_mighttest++;
thread->stats.c_mighttest++;
test = p->mightsee.data();
}
@ -217,7 +211,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
if (!more) {
// can't see anything new
c_portalskip++;
thread->stats.c_portalskip++;
continue;
}
// get plane of portal, point normal into the neighbor leaf
@ -227,7 +221,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
if (qv::epsilonEqual(prevstack.portalplane.normal, backplane.normal, VIS_EQUAL_EPSILON))
continue; // can't go out a coplanar face
c_portalcheck++;
thread->stats.c_portalcheck++;
stack.portal = p;
stack.next = NULL;
@ -244,7 +238,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
*/
/* Clip any part of the target portal behind the source portal */
stack.pass = ClipStackWinding(p->winding.get(), stack, thread->pstack_head.portalplane);
stack.pass = ClipStackWinding(thread->stats, p->winding.get(), stack, thread->pstack_head.portalplane);
if (!stack.pass)
continue;
@ -257,31 +251,31 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
}
/* Clip any part of the target portal behind the pass portal */
stack.pass = ClipStackWinding(stack.pass, stack, prevstack.portalplane);
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, prevstack.portalplane);
if (!stack.pass)
continue;
/* Clip any part of the source portal in front of the target portal */
stack.source = ClipStackWinding(prevstack.source, stack, backplane);
stack.source = ClipStackWinding(thread->stats, prevstack.source, stack, backplane);
if (!stack.source) {
FreeStackWinding(stack.pass, stack);
continue;
}
c_portaltest++;
thread->stats.c_portaltest++;
/* TEST 0 :: source -> pass -> target */
if (vis_options.level.value() > 0) {
if (stack.numseparators[0]) {
for (j = 0; j < stack.numseparators[0]; j++) {
stack.pass = ClipStackWinding(stack.pass, stack, stack.separators[0][j]);
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, stack.separators[0][j]);
if (!stack.pass)
break;
}
} else {
/* Using prevstack source for separator cache correctness */
ClipToSeparators(
prevstack.source, thread->pstack_head.portalplane, prevstack.pass, stack.pass, 0, stack);
thread->stats, prevstack.source, thread->pstack_head.portalplane, prevstack.pass, stack.pass, 0, stack);
}
if (!stack.pass) {
FreeStackWinding(stack.source, stack);
@ -293,13 +287,13 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
if (vis_options.level.value() > 1) {
if (stack.numseparators[1]) {
for (j = 0; j < stack.numseparators[1]; j++) {
stack.pass = ClipStackWinding(stack.pass, stack, stack.separators[1][j]);
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, stack.separators[1][j]);
if (!stack.pass)
break;
}
} else {
/* Using prevstack source for separator cache correctness */
ClipToSeparators(prevstack.pass, prevstack.portalplane, prevstack.source, stack.pass, 1, stack);
ClipToSeparators(thread->stats, prevstack.pass, prevstack.portalplane, prevstack.source, stack.pass, 1, stack);
}
if (!stack.pass) {
FreeStackWinding(stack.source, stack);
@ -309,7 +303,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
/* TEST 2 :: target -> pass -> source */
if (vis_options.level.value() > 2) {
ClipToSeparators(stack.pass, stack.portalplane, prevstack.pass, stack.source, 2, stack);
ClipToSeparators(thread->stats, stack.pass, stack.portalplane, prevstack.pass, stack.source, 2, stack);
if (!stack.source) {
FreeStackWinding(stack.pass, stack);
continue;
@ -318,14 +312,14 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
/* TEST 3 :: pass -> target -> source */
if (vis_options.level.value() > 3) {
ClipToSeparators(prevstack.pass, prevstack.portalplane, stack.pass, stack.source, 3, stack);
ClipToSeparators(thread->stats, prevstack.pass, prevstack.portalplane, stack.pass, stack.source, 3, stack);
if (!stack.source) {
FreeStackWinding(stack.pass, stack);
continue;
}
}
c_portalpass++;
thread->stats.c_portalpass++;
// flow through it for real
RecursiveLeafFlow(p->leaf, thread, stack);
@ -340,7 +334,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
PortalFlow
===============
*/
void PortalFlow(visportal_t *p)
visstats_t PortalFlow(visportal_t *p)
{
threaddata_t data{p->visbits};
@ -357,6 +351,8 @@ void PortalFlow(visportal_t *p)
data.pstack_head.mightsee = &p->mightsee;
RecursiveLeafFlow(p->leaf, &data, data.pstack_head);
return data.stats;
}
/*

View File

@ -3,6 +3,7 @@
#include <climits>
#include <cstdint>
#include <bit> // for std::countr_zero
#include <numeric> // for std::accumulate
#include <vis/leafbits.hh>
#include <vis/vis.hh>
@ -24,11 +25,6 @@ int portalleafs_real; /* real no. of leafs after expanding PRT2 clusters. Not us
std::vector<visportal_t> portals; // always numportals * 2; front and back
std::vector<leaf_t> leafs;
int c_portaltest, c_portalpass, c_portalcheck, c_mightseeupdate;
int c_noclip = 0;
bool showgetleaf = true;
static std::vector<uint8_t> vismap;
uint32_t originalvismapsize;
@ -116,7 +112,7 @@ void FreeStackWinding(viswinding_t *&w, pstack_t &stack)
is returned.
==================
*/
viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d &split)
viswinding_t *ClipStackWinding(visstats_t &stats, viswinding_t *in, pstack_t &stack, const qplane3d &split)
{
vec_t *dists = (vec_t *)alloca(sizeof(vec_t) * (in->size() + 1));
int *sides = (int *)alloca(sizeof(int) * (in->size() + 1));
@ -216,7 +212,7 @@ viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d
noclip:
FreeStackWinding(neww, stack);
c_noclip++;
stats.c_noclip++;
return in;
}
@ -271,7 +267,7 @@ visportal_t *GetNextPortal(void)
Called with the lock held.
=============
*/
static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
static void UpdateMightsee(visstats_t &stats, const leaf_t &source, const leaf_t &dest)
{
size_t leafnum = &dest - leafs.data();
for (size_t i = 0; i < source.numportals; i++) {
@ -282,7 +278,7 @@ static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
if (p->mightsee[leafnum]) {
p->mightsee[leafnum] = false;
p->nummightsee--;
c_mightseeupdate++;
stats.c_mightseeupdate++;
}
}
}
@ -297,7 +293,7 @@ static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
Called with the lock held.
=============
*/
static void PortalCompleted(visportal_t *completed)
static void PortalCompleted(visstats_t &stats, visportal_t *completed)
{
int i, j, k, bit, numblocks;
int leafnum;
@ -349,7 +345,7 @@ static void PortalCompleted(visportal_t *completed)
bit = std::countr_zero(changed);
changed &= ~nth_bit(bit);
leafnum = (j << leafbits_t::shift) + bit;
UpdateMightsee(leafs[leafnum], myleaf);
UpdateMightsee(stats, leafs[leafnum], myleaf);
}
}
}
@ -365,7 +361,7 @@ static duration stateinterval;
LeafThread
==============
*/
void LeafThread(size_t)
static visstats_t LeafThread()
{
visportal_t *p;
@ -380,14 +376,16 @@ void LeafThread(size_t)
p = GetNextPortal();
if (!p)
return;
return {};
PortalFlow(p);
visstats_t stats = PortalFlow(p);
PortalCompleted(p);
PortalCompleted(stats, p);
logging::print(logging::flag::VERBOSE, "portal:{:4} mightsee:{:4} cansee:{:4}\n", (ptrdiff_t)(p - portals.data()),
p->nummightsee, p->numcansee);
return stats;
}
/*
@ -503,7 +501,7 @@ static void ClusterFlow(int clusternum, leafbits_t &buffer, mbsp_t *bsp)
CalcPortalVis
==================
*/
void CalcPortalVis(const mbsp_t *bsp)
visstats_t CalcPortalVis(const mbsp_t *bsp)
{
// fastvis just uses mightsee for a very loose bound
if (vis_options.fast.value()) {
@ -511,7 +509,7 @@ void CalcPortalVis(const mbsp_t *bsp)
p.visbits = p.mightsee;
p.status = pstat_done;
}
return;
return {};
}
/*
@ -525,14 +523,26 @@ void CalcPortalVis(const mbsp_t *bsp)
}
portalIndex = startcount;
logging::parallel_for(startcount, numportals * 2, LeafThread);
std::vector<visstats_t> stats_perportal;
stats_perportal.resize(numportals * 2);
logging::parallel_for(startcount, numportals * 2, [&](size_t i) {
stats_perportal[i] = LeafThread();
});
const visstats_t stats = std::accumulate(stats_perportal.begin(),
stats_perportal.end(),
visstats_t{});
SaveVisState();
logging::print(logging::flag::VERBOSE, "portalcheck: {} portaltest: {} portalpass: {}\n", c_portalcheck,
c_portaltest, c_portalpass);
logging::print(logging::flag::VERBOSE, "c_vistest: {} c_mighttest: {} c_mightseeupdate {}\n", c_vistest,
c_mighttest, c_mightseeupdate);
logging::print(logging::flag::VERBOSE, "portalcheck: {} portaltest: {} portalpass: {}\n", stats.c_portalcheck,
stats.c_portaltest, stats.c_portalpass);
logging::print(logging::flag::VERBOSE, "c_vistest: {} c_mighttest: {} c_mightseeupdate {}\n", stats.c_vistest,
stats.c_mighttest, stats.c_mightseeupdate);
return stats;
}
/*
@ -540,7 +550,7 @@ void CalcPortalVis(const mbsp_t *bsp)
CalcVis
==================
*/
void CalcVis(mbsp_t *bsp)
visstats_t CalcVis(mbsp_t *bsp)
{
int i;
@ -552,7 +562,7 @@ void CalcVis(mbsp_t *bsp)
}
logging::print("Calculating Full Vis:\n");
CalcPortalVis(bsp);
auto stats = CalcPortalVis(bsp);
//
// assemble the leaf vis lists by oring and compressing the portal lists
@ -575,6 +585,8 @@ void CalcVis(mbsp_t *bsp)
logging::print("average leafs visible: {}\n", avg);
}
return stats;
}
// ===========================================================================
@ -747,10 +759,10 @@ int vis_main(int argc, const char **argv)
uncompressed.resize(portalleafs * leafbytes);
}
CalcVis(&bsp);
auto stats = CalcVis(&bsp);
logging::print("c_noclip: {}\n", c_noclip);
logging::print("c_chains: {}\n", c_chains);
logging::print("c_noclip: {}\n", stats.c_noclip);
logging::print("c_chains: {}\n", stats.c_chains);
bsp.dvis.bits = std::move(vismap);
bsp.dvis.bits.shrink_to_fit();