Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-104584: Move super-instruction special-casing to generator #106500

Merged
merged 3 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Python/opcode_metadata.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 16 additions & 37 deletions Python/optimizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -411,44 +411,15 @@ translate_bytecode_to_trace(
for (;;) {
ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive));
int opcode = instr->op.code;
uint64_t operand = instr->op.arg;
int oparg = instr->op.arg;
int extras = 0;
while (opcode == EXTENDED_ARG) {
instr++;
extras += 1;
opcode = instr->op.code;
operand = (operand << 8) | instr->op.arg;
oparg = (oparg << 8) | instr->op.arg;
}
switch (opcode) {
case LOAD_FAST_LOAD_FAST:
case STORE_FAST_LOAD_FAST:
case STORE_FAST_STORE_FAST:
{
// Reserve space for two uops (+ SAVE_IP + EXIT_TRACE)
if (trace_length + 4 > max_length) {
DPRINTF(1, "Ran out of space for LOAD_FAST_LOAD_FAST\n");
goto done;
}
uint64_t oparg1 = operand >> 4;
uint64_t oparg2 = operand & 15;
switch (opcode) {
case LOAD_FAST_LOAD_FAST:
ADD_TO_TRACE(LOAD_FAST, oparg1);
ADD_TO_TRACE(LOAD_FAST, oparg2);
break;
case STORE_FAST_LOAD_FAST:
ADD_TO_TRACE(STORE_FAST, oparg1);
ADD_TO_TRACE(LOAD_FAST, oparg2);
break;
case STORE_FAST_STORE_FAST:
ADD_TO_TRACE(STORE_FAST, oparg1);
ADD_TO_TRACE(STORE_FAST, oparg2);
break;
default:
Py_FatalError("Missing case");
}
break;
}
default:
{
const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode];
Expand All @@ -462,9 +433,11 @@ translate_bytecode_to_trace(
goto done;
}
for (int i = 0; i < nuops; i++) {
uint64_t operand;
int offset = expansion->uops[i].offset;
switch (expansion->uops[i].size) {
case 0:
case OPARG_FULL:
operand = oparg;
if (extras && OPCODE_HAS_JUMP(opcode)) {
if (opcode == JUMP_BACKWARD_NO_INTERRUPT) {
operand -= extras;
Expand All @@ -475,19 +448,25 @@ translate_bytecode_to_trace(
}
}
break;
case 1:
case OPARG_CACHE_1:
operand = read_u16(&instr[offset].cache);
break;
case 2:
case OPARG_CACHE_2:
operand = read_u32(&instr[offset].cache);
break;
case 4:
case OPARG_CACHE_4:
operand = read_u64(&instr[offset].cache);
break;
case OPARG_TOP: // First half of super-instr
operand = oparg >> 4;
break;
case OPARG_BOTTOM: // Second half of super-instr
operand = oparg & 0xF;
break;
default:
fprintf(stderr,
"opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n",
opcode, operand, nuops, i,
"opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n",
opcode, oparg, nuops, i,
expansion->uops[i].size,
expansion->uops[i].offset);
Py_FatalError("garbled expansion");
Expand Down
61 changes: 59 additions & 2 deletions Tools/cases_generator/generate_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,17 @@
UNUSED = "unused"
BITS_PER_CODE_UNIT = 16

# Constants used instead of size for macro expansions.
# Note: 1, 2, 4 must match actual cache entry sizes.
OPARG_SIZES = {
"OPARG_FULL": 0,
"OPARG_CACHE_1": 1,
"OPARG_CACHE_2": 2,
"OPARG_CACHE_4": 4,
"OPARG_TOP": 5,
"OPARG_BOTTOM": 6,
}

RESERVED_WORDS = {
"co_consts" : "Use FRAME_CO_CONSTS.",
"co_names": "Use FRAME_CO_NAMES.",
Expand Down Expand Up @@ -1213,7 +1224,10 @@ def write_metadata(self) -> None:
self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];")
self.out.emit("")

for key, value in OPARG_SIZES.items():
self.out.emit(f"#define {key} {value}")
self.out.emit("")

self.out.emit("#define OPCODE_METADATA_FMT(OP) "
"(_PyOpcode_opcode_metadata[(OP)].instr_format)")
self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\")
Expand Down Expand Up @@ -1263,6 +1277,9 @@ def write_metadata(self) -> None:
# Construct a dummy Component -- input/output mappings are not used
part = Component(instr, [], [], instr.active_caches)
self.write_macro_expansions(instr.name, [part])
elif instr.kind == "inst" and variable_used(instr.inst, "oparg1"):
assert variable_used(instr.inst, "oparg2"), "Half super-instr?"
self.write_super_expansions(instr.name)
case parser.Macro():
mac = self.macro_instrs[thing.name]
self.write_macro_expansions(mac.name, mac.parts)
Expand Down Expand Up @@ -1342,18 +1359,58 @@ def write_macro_expansions(self, name: str, parts: MacroParts) -> None:
print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop")
return
if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches:
size, offset = 0, 0
size, offset = OPARG_SIZES["OPARG_FULL"], 0
else:
# If this assert triggers, is_viable_uops() lied
assert len(part.active_caches) == 1, (name, part.instr.name)
cache = part.active_caches[0]
size, offset = cache.effect.size, cache.offset
expansions.append((part.instr.name, size, offset))
assert len(expansions) > 0, f"Macro {name} has empty expansion?!"
self.write_expansions(name, expansions)

def write_super_expansions(self, name: str) -> None:
"""Write special macro expansions for super-instructions.

If you get an assertion failure here, you probably have accidentally
violated one of the assumptions here.

- A super-instruction's name is of the form FIRST_SECOND where
FIRST and SECOND are regular instructions whose name has the
form FOO_BAR. Thus, there must be exactly 3 underscores.
Example: LOAD_CONST_STORE_FAST.

- A super-instruction's body uses `oparg1 and `oparg2`, and no
other instruction's body uses those variable names.

- A super-instruction has no active (used) cache entries.

In the expansion, the first instruction's operand is all but the
bottom 4 bits of the super-instruction's oparg, and the second
instruction's operand is the bottom 4 bits. We use the special
size codes OPARG_TOP and OPARG_BOTTOM for these.
"""
pieces = name.split("_")
assert len(pieces) == 4, f"{name} doesn't look like a super-instr"
name1 = "_".join(pieces[:2])
name2 = "_".join(pieces[2:])
assert name1 in self.instrs, f"{name1} doesn't match any instr"
assert name2 in self.instrs, f"{name2} doesn't match any instr"
instr1 = self.instrs[name1]
instr2 = self.instrs[name2]
assert not instr1.active_caches, f"{name1} has active caches"
assert not instr2.active_caches, f"{name2} has active caches"
expansions = [
(name1, OPARG_SIZES["OPARG_TOP"], 0),
(name2, OPARG_SIZES["OPARG_BOTTOM"], 0),
]
self.write_expansions(name, expansions)

def write_expansions(self, name: str, expansions: list[tuple[str, int, int]]) -> None:
pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions]
self.out.emit(
f"[{name}] = "
f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }},"
f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }},"
)

def emit_metadata_entry(
Expand Down
Loading