vis: huge speedup from fixing false sharing with stat counters
gmsp3v2.bsp, 32 threads 3950x, 157s -> 61s
This commit is contained in:
parent
7e5ad9a39b
commit
c9edf4ddaa
|
|
@ -196,15 +196,45 @@ struct pstack_t
|
|||
// important for perf as a ton of these are stack allocated, needs to be be just a pointer bump
|
||||
static_assert(std::is_trivially_default_constructible_v<pstack_t>);
|
||||
|
||||
struct visstats_t
|
||||
{
|
||||
int64_t c_portaltest = 0;
|
||||
int64_t c_portalpass = 0;
|
||||
int64_t c_portalcheck = 0;
|
||||
int64_t c_mightseeupdate = 0;
|
||||
int64_t c_noclip = 0;
|
||||
int64_t c_vistest = 0;
|
||||
int64_t c_mighttest = 0;
|
||||
int64_t c_chains = 0;
|
||||
int64_t c_leafskip = 0;
|
||||
int64_t c_portalskip = 0;
|
||||
|
||||
visstats_t operator+(const visstats_t& other) const {
|
||||
visstats_t result;
|
||||
result.c_portaltest = this->c_portaltest + other.c_portaltest;
|
||||
result.c_portalpass = this->c_portalpass + other.c_portalpass;
|
||||
result.c_portalcheck = this->c_portalcheck + other.c_portalcheck;
|
||||
result.c_mightseeupdate = this->c_mightseeupdate + other.c_mightseeupdate;
|
||||
result.c_noclip = this->c_noclip + other.c_noclip;
|
||||
result.c_vistest = this->c_vistest + other.c_vistest;
|
||||
result.c_mighttest = this->c_mighttest + other.c_mighttest;
|
||||
result.c_chains = this->c_chains + other.c_chains;
|
||||
result.c_leafskip = this->c_leafskip + other.c_leafskip;
|
||||
result.c_portalskip = this->c_portalskip + other.c_portalskip;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
viswinding_t *AllocStackWinding(pstack_t &stack);
|
||||
void FreeStackWinding(viswinding_t *&w, pstack_t &stack);
|
||||
viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d &split);
|
||||
viswinding_t *ClipStackWinding(visstats_t &stats, viswinding_t *in, pstack_t &stack, const qplane3d &split);
|
||||
|
||||
struct threaddata_t
|
||||
{
|
||||
leafbits_t &leafvis;
|
||||
visportal_t *base;
|
||||
pstack_t pstack_head;
|
||||
visstats_t stats;
|
||||
};
|
||||
|
||||
extern int numportals;
|
||||
|
|
@ -214,13 +244,6 @@ extern int portalleafs_real;
|
|||
extern std::vector<visportal_t> portals; // always numportals * 2; front and back
|
||||
extern std::vector<leaf_t> leafs;
|
||||
|
||||
extern int c_noclip;
|
||||
extern int c_portaltest, c_portalpass, c_portalcheck;
|
||||
extern int c_vistest, c_mighttest;
|
||||
extern unsigned long c_chains;
|
||||
|
||||
extern bool showgetleaf;
|
||||
|
||||
extern std::vector<uint8_t> uncompressed;
|
||||
extern int leafbytes;
|
||||
extern int leafbytes_real;
|
||||
|
|
@ -230,7 +253,7 @@ extern fs::path portalfile, statefile, statetmpfile;
|
|||
|
||||
void BasePortalVis(void);
|
||||
|
||||
void PortalFlow(visportal_t *p);
|
||||
visstats_t PortalFlow(visportal_t *p);
|
||||
|
||||
void CalcAmbientSounds(mbsp_t *bsp);
|
||||
|
||||
|
|
|
|||
48
vis/flow.cc
48
vis/flow.cc
|
|
@ -4,12 +4,6 @@
|
|||
#include <common/parallel.hh>
|
||||
#include <atomic>
|
||||
|
||||
unsigned long c_chains;
|
||||
int c_vistest, c_mighttest;
|
||||
|
||||
static int c_portalskip;
|
||||
static int c_leafskip;
|
||||
|
||||
/*
|
||||
==============
|
||||
ClipToSeparators
|
||||
|
|
@ -30,7 +24,7 @@ static int c_leafskip;
|
|||
pointer, was measurably faster
|
||||
==============
|
||||
*/
|
||||
static void ClipToSeparators(const viswinding_t *source, const qplane3d src_pl, const viswinding_t *pass,
|
||||
static void ClipToSeparators(visstats_t &stats, const viswinding_t *source, const qplane3d src_pl, const viswinding_t *pass,
|
||||
viswinding_t *&target, unsigned int test, pstack_t &stack)
|
||||
{
|
||||
int i, j, k, l;
|
||||
|
|
@ -114,7 +108,7 @@ static void ClipToSeparators(const viswinding_t *source, const qplane3d src_pl,
|
|||
stack.numseparators[test]++;
|
||||
}
|
||||
|
||||
target = ClipStackWinding(target, stack, sep);
|
||||
target = ClipStackWinding(stats, target, stack, sep);
|
||||
|
||||
if (!target)
|
||||
return; // target is not visible
|
||||
|
|
@ -150,7 +144,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
leaf_t *leaf;
|
||||
int i, j, err, numblocks;
|
||||
|
||||
++c_chains;
|
||||
++thread->stats.c_chains;
|
||||
|
||||
leaf = &leafs[leafnum];
|
||||
|
||||
|
|
@ -193,7 +187,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
p = leaf->portals[i];
|
||||
|
||||
if (!(*prevstack.mightsee)[p->leaf]) {
|
||||
c_leafskip++;
|
||||
thread->stats.c_leafskip++;
|
||||
continue; // can't possibly see it
|
||||
}
|
||||
|
||||
|
|
@ -201,10 +195,10 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
|
||||
// if the portal can't see anything we haven't allready seen, skip it
|
||||
if (p->status == pstat_done) {
|
||||
c_vistest++;
|
||||
thread->stats.c_vistest++;
|
||||
test = p->visbits.data();
|
||||
} else {
|
||||
c_mighttest++;
|
||||
thread->stats.c_mighttest++;
|
||||
test = p->mightsee.data();
|
||||
}
|
||||
|
||||
|
|
@ -217,7 +211,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
|
||||
if (!more) {
|
||||
// can't see anything new
|
||||
c_portalskip++;
|
||||
thread->stats.c_portalskip++;
|
||||
continue;
|
||||
}
|
||||
// get plane of portal, point normal into the neighbor leaf
|
||||
|
|
@ -227,7 +221,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
if (qv::epsilonEqual(prevstack.portalplane.normal, backplane.normal, VIS_EQUAL_EPSILON))
|
||||
continue; // can't go out a coplanar face
|
||||
|
||||
c_portalcheck++;
|
||||
thread->stats.c_portalcheck++;
|
||||
|
||||
stack.portal = p;
|
||||
stack.next = NULL;
|
||||
|
|
@ -244,7 +238,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
*/
|
||||
|
||||
/* Clip any part of the target portal behind the source portal */
|
||||
stack.pass = ClipStackWinding(p->winding.get(), stack, thread->pstack_head.portalplane);
|
||||
stack.pass = ClipStackWinding(thread->stats, p->winding.get(), stack, thread->pstack_head.portalplane);
|
||||
if (!stack.pass)
|
||||
continue;
|
||||
|
||||
|
|
@ -257,31 +251,31 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
}
|
||||
|
||||
/* Clip any part of the target portal behind the pass portal */
|
||||
stack.pass = ClipStackWinding(stack.pass, stack, prevstack.portalplane);
|
||||
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, prevstack.portalplane);
|
||||
if (!stack.pass)
|
||||
continue;
|
||||
|
||||
/* Clip any part of the source portal in front of the target portal */
|
||||
stack.source = ClipStackWinding(prevstack.source, stack, backplane);
|
||||
stack.source = ClipStackWinding(thread->stats, prevstack.source, stack, backplane);
|
||||
if (!stack.source) {
|
||||
FreeStackWinding(stack.pass, stack);
|
||||
continue;
|
||||
}
|
||||
|
||||
c_portaltest++;
|
||||
thread->stats.c_portaltest++;
|
||||
|
||||
/* TEST 0 :: source -> pass -> target */
|
||||
if (vis_options.level.value() > 0) {
|
||||
if (stack.numseparators[0]) {
|
||||
for (j = 0; j < stack.numseparators[0]; j++) {
|
||||
stack.pass = ClipStackWinding(stack.pass, stack, stack.separators[0][j]);
|
||||
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, stack.separators[0][j]);
|
||||
if (!stack.pass)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* Using prevstack source for separator cache correctness */
|
||||
ClipToSeparators(
|
||||
prevstack.source, thread->pstack_head.portalplane, prevstack.pass, stack.pass, 0, stack);
|
||||
thread->stats, prevstack.source, thread->pstack_head.portalplane, prevstack.pass, stack.pass, 0, stack);
|
||||
}
|
||||
if (!stack.pass) {
|
||||
FreeStackWinding(stack.source, stack);
|
||||
|
|
@ -293,13 +287,13 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
if (vis_options.level.value() > 1) {
|
||||
if (stack.numseparators[1]) {
|
||||
for (j = 0; j < stack.numseparators[1]; j++) {
|
||||
stack.pass = ClipStackWinding(stack.pass, stack, stack.separators[1][j]);
|
||||
stack.pass = ClipStackWinding(thread->stats, stack.pass, stack, stack.separators[1][j]);
|
||||
if (!stack.pass)
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* Using prevstack source for separator cache correctness */
|
||||
ClipToSeparators(prevstack.pass, prevstack.portalplane, prevstack.source, stack.pass, 1, stack);
|
||||
ClipToSeparators(thread->stats, prevstack.pass, prevstack.portalplane, prevstack.source, stack.pass, 1, stack);
|
||||
}
|
||||
if (!stack.pass) {
|
||||
FreeStackWinding(stack.source, stack);
|
||||
|
|
@ -309,7 +303,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
|
||||
/* TEST 2 :: target -> pass -> source */
|
||||
if (vis_options.level.value() > 2) {
|
||||
ClipToSeparators(stack.pass, stack.portalplane, prevstack.pass, stack.source, 2, stack);
|
||||
ClipToSeparators(thread->stats, stack.pass, stack.portalplane, prevstack.pass, stack.source, 2, stack);
|
||||
if (!stack.source) {
|
||||
FreeStackWinding(stack.pass, stack);
|
||||
continue;
|
||||
|
|
@ -318,14 +312,14 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
|
||||
/* TEST 3 :: pass -> target -> source */
|
||||
if (vis_options.level.value() > 3) {
|
||||
ClipToSeparators(prevstack.pass, prevstack.portalplane, stack.pass, stack.source, 3, stack);
|
||||
ClipToSeparators(thread->stats, prevstack.pass, prevstack.portalplane, stack.pass, stack.source, 3, stack);
|
||||
if (!stack.source) {
|
||||
FreeStackWinding(stack.pass, stack);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
c_portalpass++;
|
||||
thread->stats.c_portalpass++;
|
||||
|
||||
// flow through it for real
|
||||
RecursiveLeafFlow(p->leaf, thread, stack);
|
||||
|
|
@ -340,7 +334,7 @@ static void RecursiveLeafFlow(int leafnum, threaddata_t *thread, pstack_t &prevs
|
|||
PortalFlow
|
||||
===============
|
||||
*/
|
||||
void PortalFlow(visportal_t *p)
|
||||
visstats_t PortalFlow(visportal_t *p)
|
||||
{
|
||||
threaddata_t data{p->visbits};
|
||||
|
||||
|
|
@ -357,6 +351,8 @@ void PortalFlow(visportal_t *p)
|
|||
data.pstack_head.mightsee = &p->mightsee;
|
||||
|
||||
RecursiveLeafFlow(p->leaf, &data, data.pstack_head);
|
||||
|
||||
return data.stats;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
|||
66
vis/vis.cc
66
vis/vis.cc
|
|
@ -3,6 +3,7 @@
|
|||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <bit> // for std::countr_zero
|
||||
#include <numeric> // for std::accumulate
|
||||
|
||||
#include <vis/leafbits.hh>
|
||||
#include <vis/vis.hh>
|
||||
|
|
@ -24,11 +25,6 @@ int portalleafs_real; /* real no. of leafs after expanding PRT2 clusters. Not us
|
|||
std::vector<visportal_t> portals; // always numportals * 2; front and back
|
||||
std::vector<leaf_t> leafs;
|
||||
|
||||
int c_portaltest, c_portalpass, c_portalcheck, c_mightseeupdate;
|
||||
int c_noclip = 0;
|
||||
|
||||
bool showgetleaf = true;
|
||||
|
||||
static std::vector<uint8_t> vismap;
|
||||
|
||||
uint32_t originalvismapsize;
|
||||
|
|
@ -116,7 +112,7 @@ void FreeStackWinding(viswinding_t *&w, pstack_t &stack)
|
|||
is returned.
|
||||
==================
|
||||
*/
|
||||
viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d &split)
|
||||
viswinding_t *ClipStackWinding(visstats_t &stats, viswinding_t *in, pstack_t &stack, const qplane3d &split)
|
||||
{
|
||||
vec_t *dists = (vec_t *)alloca(sizeof(vec_t) * (in->size() + 1));
|
||||
int *sides = (int *)alloca(sizeof(int) * (in->size() + 1));
|
||||
|
|
@ -216,7 +212,7 @@ viswinding_t *ClipStackWinding(viswinding_t *in, pstack_t &stack, const qplane3d
|
|||
|
||||
noclip:
|
||||
FreeStackWinding(neww, stack);
|
||||
c_noclip++;
|
||||
stats.c_noclip++;
|
||||
return in;
|
||||
}
|
||||
|
||||
|
|
@ -271,7 +267,7 @@ visportal_t *GetNextPortal(void)
|
|||
Called with the lock held.
|
||||
=============
|
||||
*/
|
||||
static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
|
||||
static void UpdateMightsee(visstats_t &stats, const leaf_t &source, const leaf_t &dest)
|
||||
{
|
||||
size_t leafnum = &dest - leafs.data();
|
||||
for (size_t i = 0; i < source.numportals; i++) {
|
||||
|
|
@ -282,7 +278,7 @@ static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
|
|||
if (p->mightsee[leafnum]) {
|
||||
p->mightsee[leafnum] = false;
|
||||
p->nummightsee--;
|
||||
c_mightseeupdate++;
|
||||
stats.c_mightseeupdate++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -297,7 +293,7 @@ static void UpdateMightsee(const leaf_t &source, const leaf_t &dest)
|
|||
Called with the lock held.
|
||||
=============
|
||||
*/
|
||||
static void PortalCompleted(visportal_t *completed)
|
||||
static void PortalCompleted(visstats_t &stats, visportal_t *completed)
|
||||
{
|
||||
int i, j, k, bit, numblocks;
|
||||
int leafnum;
|
||||
|
|
@ -349,7 +345,7 @@ static void PortalCompleted(visportal_t *completed)
|
|||
bit = std::countr_zero(changed);
|
||||
changed &= ~nth_bit(bit);
|
||||
leafnum = (j << leafbits_t::shift) + bit;
|
||||
UpdateMightsee(leafs[leafnum], myleaf);
|
||||
UpdateMightsee(stats, leafs[leafnum], myleaf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -365,7 +361,7 @@ static duration stateinterval;
|
|||
LeafThread
|
||||
==============
|
||||
*/
|
||||
void LeafThread(size_t)
|
||||
static visstats_t LeafThread()
|
||||
{
|
||||
visportal_t *p;
|
||||
|
||||
|
|
@ -380,14 +376,16 @@ void LeafThread(size_t)
|
|||
|
||||
p = GetNextPortal();
|
||||
if (!p)
|
||||
return;
|
||||
return {};
|
||||
|
||||
PortalFlow(p);
|
||||
visstats_t stats = PortalFlow(p);
|
||||
|
||||
PortalCompleted(p);
|
||||
PortalCompleted(stats, p);
|
||||
|
||||
logging::print(logging::flag::VERBOSE, "portal:{:4} mightsee:{:4} cansee:{:4}\n", (ptrdiff_t)(p - portals.data()),
|
||||
p->nummightsee, p->numcansee);
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -503,7 +501,7 @@ static void ClusterFlow(int clusternum, leafbits_t &buffer, mbsp_t *bsp)
|
|||
CalcPortalVis
|
||||
==================
|
||||
*/
|
||||
void CalcPortalVis(const mbsp_t *bsp)
|
||||
visstats_t CalcPortalVis(const mbsp_t *bsp)
|
||||
{
|
||||
// fastvis just uses mightsee for a very loose bound
|
||||
if (vis_options.fast.value()) {
|
||||
|
|
@ -511,7 +509,7 @@ void CalcPortalVis(const mbsp_t *bsp)
|
|||
p.visbits = p.mightsee;
|
||||
p.status = pstat_done;
|
||||
}
|
||||
return;
|
||||
return {};
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -525,14 +523,26 @@ void CalcPortalVis(const mbsp_t *bsp)
|
|||
}
|
||||
|
||||
portalIndex = startcount;
|
||||
logging::parallel_for(startcount, numportals * 2, LeafThread);
|
||||
|
||||
std::vector<visstats_t> stats_perportal;
|
||||
stats_perportal.resize(numportals * 2);
|
||||
|
||||
logging::parallel_for(startcount, numportals * 2, [&](size_t i) {
|
||||
stats_perportal[i] = LeafThread();
|
||||
});
|
||||
|
||||
const visstats_t stats = std::accumulate(stats_perportal.begin(),
|
||||
stats_perportal.end(),
|
||||
visstats_t{});
|
||||
|
||||
SaveVisState();
|
||||
|
||||
logging::print(logging::flag::VERBOSE, "portalcheck: {} portaltest: {} portalpass: {}\n", c_portalcheck,
|
||||
c_portaltest, c_portalpass);
|
||||
logging::print(logging::flag::VERBOSE, "c_vistest: {} c_mighttest: {} c_mightseeupdate {}\n", c_vistest,
|
||||
c_mighttest, c_mightseeupdate);
|
||||
logging::print(logging::flag::VERBOSE, "portalcheck: {} portaltest: {} portalpass: {}\n", stats.c_portalcheck,
|
||||
stats.c_portaltest, stats.c_portalpass);
|
||||
logging::print(logging::flag::VERBOSE, "c_vistest: {} c_mighttest: {} c_mightseeupdate {}\n", stats.c_vistest,
|
||||
stats.c_mighttest, stats.c_mightseeupdate);
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -540,7 +550,7 @@ void CalcPortalVis(const mbsp_t *bsp)
|
|||
CalcVis
|
||||
==================
|
||||
*/
|
||||
void CalcVis(mbsp_t *bsp)
|
||||
visstats_t CalcVis(mbsp_t *bsp)
|
||||
{
|
||||
int i;
|
||||
|
||||
|
|
@ -552,7 +562,7 @@ void CalcVis(mbsp_t *bsp)
|
|||
}
|
||||
|
||||
logging::print("Calculating Full Vis:\n");
|
||||
CalcPortalVis(bsp);
|
||||
auto stats = CalcPortalVis(bsp);
|
||||
|
||||
//
|
||||
// assemble the leaf vis lists by oring and compressing the portal lists
|
||||
|
|
@ -575,6 +585,8 @@ void CalcVis(mbsp_t *bsp)
|
|||
|
||||
logging::print("average leafs visible: {}\n", avg);
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
// ===========================================================================
|
||||
|
|
@ -747,10 +759,10 @@ int vis_main(int argc, const char **argv)
|
|||
uncompressed.resize(portalleafs * leafbytes);
|
||||
}
|
||||
|
||||
CalcVis(&bsp);
|
||||
auto stats = CalcVis(&bsp);
|
||||
|
||||
logging::print("c_noclip: {}\n", c_noclip);
|
||||
logging::print("c_chains: {}\n", c_chains);
|
||||
logging::print("c_noclip: {}\n", stats.c_noclip);
|
||||
logging::print("c_chains: {}\n", stats.c_chains);
|
||||
|
||||
bsp.dvis.bits = std::move(vismap);
|
||||
bsp.dvis.bits.shrink_to_fit();
|
||||
|
|
|
|||
Loading…
Reference in New Issue