Skip to content

Commit 29d4e04

Browse files
committed
CPU/PGXP: Use bit math for flags instead of union
1 parent 82f3e17 commit 29d4e04

File tree

2 files changed

+71
-50
lines changed

2 files changed

+71
-50
lines changed

src/core/cpu_core.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,14 @@ struct PGXP_value
5656
float y;
5757
float z;
5858
u32 value;
59-
union
59+
u32 flags;
60+
61+
ALWAYS_INLINE void SetValidComp(u32 comp, bool valid)
6062
{
61-
u32 flags;
62-
u8 compFlags[4];
63-
u16 halfFlags[2];
64-
};
63+
flags = (flags & ~(1u << comp)) | (static_cast<u32>(valid) << comp);
64+
}
65+
66+
ALWAYS_INLINE bool GetValidComp(u32 comp) const { return ConvertToBoolUnchecked((flags >> comp) & 1); }
6567
};
6668

6769
struct State

src/core/cpu_pgxp.cpp

Lines changed: 64 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,13 @@ enum : u32
3232
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4
3333
};
3434

35-
#define NONE 0
3635
#define ALL 0xFFFFFFFF
37-
#define VALID 1
38-
#define VALID_0 (VALID << 0)
39-
#define VALID_1 (VALID << 8)
40-
#define VALID_2 (VALID << 16)
41-
#define VALID_3 (VALID << 24)
36+
#define VALID_0 (1 << 0)
37+
#define VALID_1 (1 << 1)
38+
#define VALID_2 (1 << 2)
4239
#define VALID_01 (VALID_0 | VALID_1)
4340
#define VALID_012 (VALID_0 | VALID_1 | VALID_2)
44-
#define VALID_ALL (VALID_0 | VALID_1 | VALID_2 | VALID_3)
41+
#define VALID_ALL (VALID_0 | VALID_1 | VALID_2)
4542
#define INV_VALID_ALL (ALL ^ VALID_ALL)
4643

4744
union psx_value
@@ -85,6 +82,9 @@ static void CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVal, u32 rtVal);
8582
static void WriteMem(const PGXP_value* value, u32 addr);
8683
static void WriteMem16(const PGXP_value* src, u32 addr);
8784

85+
static void CopyZIfMissing(PGXP_value& dst, const PGXP_value& src);
86+
static void SelectZ(PGXP_value& dst, const PGXP_value& src1, const PGXP_value& src2);
87+
8888
#ifdef LOG_VALUES
8989
static void LogInstruction(u32 pc, u32 instr);
9090
static void LogValue(const char* name, u32 rval, const PGXP_value* val);
@@ -107,8 +107,8 @@ static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const
107107
#endif
108108
// clang-format on
109109

110-
static const PGXP_value PGXP_value_invalid = {0.f, 0.f, 0.f, 0, {0}};
111-
static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, 0, {VALID_ALL}};
110+
static const PGXP_value PGXP_value_invalid = {0.f, 0.f, 0.f, 0, 0};
111+
static const PGXP_value PGXP_value_zero = {0.f, 0.f, 0.f, 0, VALID_ALL};
112112

113113
static PGXP_value* s_mem = nullptr;
114114
static PGXP_value* s_vertex_cache = nullptr;
@@ -303,19 +303,19 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndCopyMem16(PGXP_value* dest, u32
303303
if (hiword)
304304
{
305305
dest->x = dest->y;
306-
dest->compFlags[0] = dest->compFlags[1];
306+
dest->SetValidComp(0, dest->GetValidComp(1));
307307
}
308308

309309
// only set y as valid if x is also valid.. don't want to make fake values
310-
if (dest->compFlags[0] == VALID)
310+
if (dest->GetValidComp(0))
311311
{
312312
dest->y = (dest->x < 0) ? -1.f * sign : 0.f;
313-
dest->compFlags[1] = VALID;
313+
dest->SetValidComp(1, true);
314314
}
315315
else
316316
{
317317
dest->y = 0.0f;
318-
dest->compFlags[1] = 0;
318+
dest->SetValidComp(1, false);
319319
}
320320

321321
dest->value = value;
@@ -340,24 +340,39 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(const PGXP_value* src, u32 addr
340340
if (hiword)
341341
{
342342
dest->y = src->x;
343-
dest->compFlags[1] = src->compFlags[0];
343+
dest->SetValidComp(1, src->GetValidComp(0));
344344
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (src->value << 16);
345345
}
346346
else
347347
{
348348
dest->x = src->x;
349-
dest->compFlags[0] = src->compFlags[0];
349+
dest->SetValidComp(0, src->GetValidComp(0));
350350
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (src->value & UINT32_C(0x0000FFFF));
351351
}
352352

353353
// overwrite z/w if valid
354-
if (src->compFlags[2] == VALID)
354+
if (src->GetValidComp(2))
355355
{
356356
dest->z = src->z;
357-
dest->compFlags[2] = src->compFlags[2];
357+
dest->SetValidComp(2, true);
358358
}
359359
}
360360

361+
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXP_value& dst, const PGXP_value& src)
362+
{
363+
if (dst.GetValidComp(2))
364+
return;
365+
366+
dst.z = src.z;
367+
dst.flags |= (src.flags & VALID_2);
368+
}
369+
370+
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(PGXP_value& dst, const PGXP_value& src1, const PGXP_value& src2)
371+
{
372+
dst.z = src1.GetValidComp(2) ? src1.z : src2.z;
373+
dst.flags |= ((src1.flags | src2.flags) & VALID_2);
374+
}
375+
361376
#ifdef LOG_VALUES
362377
void CPU::PGXP::LogInstruction(u32 pc, u32 instr)
363378
{
@@ -929,13 +944,13 @@ void CPU::PGXP::CPU_ADD(u32 instr, u32 rsVal, u32 rtVal)
929944

930945
// TODO: decide which "z/w" component to use
931946

932-
ret.halfFlags[0] &= g_state.pgxp_gpr[rt(instr)].halfFlags[0];
947+
ret.flags &= (g_state.pgxp_gpr[rt(instr)].flags & VALID_01);
933948
}
934949

935950
if (!(ret.flags & VALID_2) && (g_state.pgxp_gpr[rt(instr)].flags & VALID_2))
936951
{
937952
ret.z = g_state.pgxp_gpr[rt(instr)].z;
938-
ret.flags |= VALID_2;
953+
ret.SetValidComp(2, true);
939954
}
940955

941956
ret.value = rsVal + rtVal;
@@ -974,7 +989,7 @@ void CPU::PGXP::CPU_SUB(u32 instr, u32 rsVal, u32 rtVal)
974989
// truncate on overflow/underflow
975990
ret.y += (ret.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (ret.y < SHRT_MIN) ? USHRT_MAX + 1 : 0.f;
976991

977-
ret.halfFlags[0] &= g_state.pgxp_gpr[rt(instr)].halfFlags[0];
992+
ret.flags &= (g_state.pgxp_gpr[rt(instr)].flags & VALID_01);
978993

979994
ret.value = rsVal - rtVal;
980995

@@ -1020,17 +1035,17 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVa
10201035
else if (vald.w.l == vals.w.l)
10211036
{
10221037
ret.x = g_state.pgxp_gpr[rs(instr)].x;
1023-
ret.compFlags[0] = g_state.pgxp_gpr[rs(instr)].compFlags[0];
1038+
ret.SetValidComp(0, g_state.pgxp_gpr[rs(instr)].GetValidComp(0));
10241039
}
10251040
else if (vald.w.l == valt.w.l)
10261041
{
10271042
ret.x = g_state.pgxp_gpr[rt(instr)].x;
1028-
ret.compFlags[0] = g_state.pgxp_gpr[rt(instr)].compFlags[0];
1043+
ret.SetValidComp(0, g_state.pgxp_gpr[rt(instr)].GetValidComp(0));
10291044
}
10301045
else
10311046
{
10321047
ret.x = (float)vald.sw.l;
1033-
ret.compFlags[0] = VALID;
1048+
ret.SetValidComp(0, true);
10341049
}
10351050

10361051
if (vald.w.h == 0)
@@ -1040,17 +1055,17 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVa
10401055
else if (vald.w.h == vals.w.h)
10411056
{
10421057
ret.y = g_state.pgxp_gpr[rs(instr)].y;
1043-
ret.compFlags[1] &= g_state.pgxp_gpr[rs(instr)].compFlags[1];
1058+
ret.SetValidComp(1, g_state.pgxp_gpr[rs(instr)].GetValidComp(1));
10441059
}
10451060
else if (vald.w.h == valt.w.h)
10461061
{
10471062
ret.y = g_state.pgxp_gpr[rt(instr)].y;
1048-
ret.compFlags[1] &= g_state.pgxp_gpr[rt(instr)].compFlags[1];
1063+
ret.SetValidComp(1, g_state.pgxp_gpr[rt(instr)].GetValidComp(1));
10491064
}
10501065
else
10511066
{
10521067
ret.y = (float)vald.sw.h;
1053-
ret.compFlags[1] = VALID;
1068+
ret.SetValidComp(1, true);
10541069
}
10551070

10561071
// iCB Hack: Force validity if even one half is valid
@@ -1059,20 +1074,20 @@ ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(u32 instr, u32 rdVal, u32 rsVa
10591074
// /iCB Hack
10601075

10611076
// Get a valid W
1062-
if ((g_state.pgxp_gpr[rs(instr)].flags & VALID_2) == VALID_2)
1077+
if (g_state.pgxp_gpr[rs(instr)].GetValidComp(2))
10631078
{
10641079
ret.z = g_state.pgxp_gpr[rs(instr)].z;
1065-
ret.compFlags[2] = g_state.pgxp_gpr[rs(instr)].compFlags[2];
1080+
ret.SetValidComp(2, true);
10661081
}
1067-
else if ((g_state.pgxp_gpr[rt(instr)].flags & VALID_2) == VALID_2)
1082+
else if (g_state.pgxp_gpr[rt(instr)].GetValidComp(2))
10681083
{
10691084
ret.z = g_state.pgxp_gpr[rt(instr)].z;
1070-
ret.compFlags[2] = g_state.pgxp_gpr[rt(instr)].compFlags[2];
1085+
ret.SetValidComp(2, true);
10711086
}
10721087
else
10731088
{
10741089
ret.z = 0.0f;
1075-
ret.compFlags[2] = 0;
1090+
ret.SetValidComp(2, false);
10761091
}
10771092

10781093
ret.value = rdVal;
@@ -1134,7 +1149,7 @@ void CPU::PGXP::CPU_SLT(u32 instr, u32 rsVal, u32 rtVal)
11341149

11351150
ret = g_state.pgxp_gpr[rs(instr)];
11361151
ret.y = 0.f;
1137-
ret.compFlags[1] = VALID;
1152+
ret.SetValidComp(1, true);
11381153

11391154
ret.x = (g_state.pgxp_gpr[rs(instr)].y < g_state.pgxp_gpr[rt(instr)].y) ? 1.f :
11401155
(f16Unsign(g_state.pgxp_gpr[rs(instr)].x) < f16Unsign(g_state.pgxp_gpr[rt(instr)].x)) ? 1.f :
@@ -1163,7 +1178,7 @@ void CPU::PGXP::CPU_SLTU(u32 instr, u32 rsVal, u32 rtVal)
11631178

11641179
ret = g_state.pgxp_gpr[rs(instr)];
11651180
ret.y = 0.f;
1166-
ret.compFlags[1] = VALID;
1181+
ret.SetValidComp(1, true);
11671182

11681183
ret.x = (f16Unsign(g_state.pgxp_gpr[rs(instr)].y) < f16Unsign(g_state.pgxp_gpr[rt(instr)].y)) ? 1.f :
11691184
(f16Unsign(g_state.pgxp_gpr[rs(instr)].x) < f16Unsign(g_state.pgxp_gpr[rt(instr)].x)) ? 1.f :
@@ -1193,10 +1208,11 @@ void CPU::PGXP::CPU_MULT(u32 instr, u32 rsVal, u32 rtVal)
11931208
MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal);
11941209
}
11951210

1196-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[rs(instr)];
1211+
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[rs(instr)];
1212+
CopyZIfMissing(g_state.pgxp_gpr[static_cast<u8>(Reg::lo)], g_state.pgxp_gpr[rs(instr)]);
11971213

1198-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].halfFlags[0] =
1199-
(g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]);
1214+
// Z/valid is the same
1215+
g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
12001216

12011217
double xx, xy, yx, yy;
12021218
double lx = 0, ly = 0, hx = 0, hy = 0;
@@ -1245,10 +1261,11 @@ void CPU::PGXP::CPU_MULTU(u32 instr, u32 rsVal, u32 rtVal)
12451261
MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal);
12461262
}
12471263

1248-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[rs(instr)];
1264+
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[rs(instr)];
1265+
CopyZIfMissing(g_state.pgxp_gpr[static_cast<u8>(Reg::lo)], g_state.pgxp_gpr[rs(instr)]);
12491266

1250-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].halfFlags[0] =
1251-
(g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]);
1267+
// Z/valid is the same
1268+
g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
12521269

12531270
double xx, xy, yx, yy;
12541271
double lx = 0, ly = 0, hx = 0, hy = 0;
@@ -1298,10 +1315,11 @@ void CPU::PGXP::CPU_DIV(u32 instr, u32 rsVal, u32 rtVal)
12981315
MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal);
12991316
}
13001317

1301-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[rs(instr)];
1318+
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[rs(instr)];
1319+
CopyZIfMissing(g_state.pgxp_gpr[static_cast<u8>(Reg::lo)], g_state.pgxp_gpr[rs(instr)]);
13021320

1303-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].halfFlags[0] =
1304-
(g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]);
1321+
// Z/valid is the same
1322+
g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
13051323

13061324
double vs = f16Unsign(g_state.pgxp_gpr[rs(instr)].x) + (g_state.pgxp_gpr[rs(instr)].y) * (double)(1 << 16);
13071325
double vt = f16Unsign(g_state.pgxp_gpr[rt(instr)].x) + (g_state.pgxp_gpr[rt(instr)].y) * (double)(1 << 16);
@@ -1354,10 +1372,11 @@ void CPU::PGXP::CPU_DIVU(u32 instr, u32 rsVal, u32 rtVal)
13541372
MakeValid(&g_state.pgxp_gpr[rt(instr)], rtVal);
13551373
}
13561374

1357-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[rs(instr)];
1375+
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)] = g_state.pgxp_gpr[rs(instr)];
1376+
CopyZIfMissing(g_state.pgxp_gpr[static_cast<u8>(Reg::lo)], g_state.pgxp_gpr[rs(instr)]);
13581377

1359-
g_state.pgxp_gpr[static_cast<u8>(Reg::lo)].halfFlags[0] = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)].halfFlags[0] =
1360-
(g_state.pgxp_gpr[rs(instr)].halfFlags[0] & g_state.pgxp_gpr[rt(instr)].halfFlags[0]);
1378+
// Z/valid is the same
1379+
g_state.pgxp_gpr[static_cast<u8>(Reg::hi)] = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
13611380

13621381
double vs = f16Unsign(g_state.pgxp_gpr[rs(instr)].x) + f16Unsign(g_state.pgxp_gpr[rs(instr)].y) * (double)(1 << 16);
13631382
double vt = f16Unsign(g_state.pgxp_gpr[rt(instr)].x) + f16Unsign(g_state.pgxp_gpr[rt(instr)].y) * (double)(1 << 16);

0 commit comments

Comments
 (0)