From 11038c56ad80dde2cdf65190e78a59d579c94b3a Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Fri, 7 Jul 2023 10:42:10 -0700 Subject: [PATCH] gh-104584: Move super-instruction special-casing to generator (#106500) Instead of special-casing specific instructions, we add a few more special values to the 'size' field of expansions, so in the future we can automatically handle additional super-instructions in the generator. --- Python/opcode_metadata.h | 9 ++++ Python/optimizer.c | 53 +++++++-------------- Tools/cases_generator/generate_cases.py | 61 ++++++++++++++++++++++++- 3 files changed, 84 insertions(+), 39 deletions(-) diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index ac86a4abd9c1b3..d29f7216ea65e9 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -934,6 +934,12 @@ struct opcode_macro_expansion { struct { int16_t uop; int8_t size; int8_t offset; } uops[8]; }; +#define OPARG_FULL 0 +#define OPARG_CACHE_1 1 +#define OPARG_CACHE_2 2 +#define OPARG_CACHE_4 4 +#define OPARG_TOP 5 +#define OPARG_BOTTOM 6 #define OPCODE_METADATA_FMT(OP) (_PyOpcode_opcode_metadata[(OP)].instr_format) #define SAME_OPCODE_METADATA(OP1, OP2) \ @@ -1165,8 +1171,11 @@ const struct opcode_macro_expansion _PyOpcode_macro_expansion[256] = { [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { LOAD_FAST_CHECK, 0, 0 } } }, [LOAD_FAST] = { .nuops = 1, .uops = { { LOAD_FAST, 0, 0 } } }, [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { LOAD_FAST_AND_CLEAR, 0, 0 } } }, + [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { LOAD_FAST, 5, 0 }, { LOAD_FAST, 6, 0 } } }, [LOAD_CONST] = { .nuops = 1, .uops = { { LOAD_CONST, 0, 0 } } }, [STORE_FAST] = { .nuops = 1, .uops = { { STORE_FAST, 0, 0 } } }, + [STORE_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { STORE_FAST, 5, 0 }, { LOAD_FAST, 6, 0 } } }, + [STORE_FAST_STORE_FAST] = { .nuops = 2, .uops = { { STORE_FAST, 5, 0 }, { STORE_FAST, 6, 0 } } }, [POP_TOP] = { .nuops = 1, .uops = { { POP_TOP, 0, 0 } } }, [PUSH_NULL] = { .nuops = 1, .uops = { { PUSH_NULL, 0, 0 } } }, [END_FOR] = { .nuops = 2, .uops = { { POP_TOP, 0, 0 }, { POP_TOP, 0, 0 } } }, diff --git a/Python/optimizer.c b/Python/optimizer.c index db117bb180c1c8..2870f2fd05052e 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -411,44 +411,15 @@ translate_bytecode_to_trace( for (;;) { ADD_TO_TRACE(SAVE_IP, (int)(instr - (_Py_CODEUNIT *)code->co_code_adaptive)); int opcode = instr->op.code; - uint64_t operand = instr->op.arg; + int oparg = instr->op.arg; int extras = 0; while (opcode == EXTENDED_ARG) { instr++; extras += 1; opcode = instr->op.code; - operand = (operand << 8) | instr->op.arg; + oparg = (oparg << 8) | instr->op.arg; } switch (opcode) { - case LOAD_FAST_LOAD_FAST: - case STORE_FAST_LOAD_FAST: - case STORE_FAST_STORE_FAST: - { - // Reserve space for two uops (+ SAVE_IP + EXIT_TRACE) - if (trace_length + 4 > max_length) { - DPRINTF(1, "Ran out of space for LOAD_FAST_LOAD_FAST\n"); - goto done; - } - uint64_t oparg1 = operand >> 4; - uint64_t oparg2 = operand & 15; - switch (opcode) { - case LOAD_FAST_LOAD_FAST: - ADD_TO_TRACE(LOAD_FAST, oparg1); - ADD_TO_TRACE(LOAD_FAST, oparg2); - break; - case STORE_FAST_LOAD_FAST: - ADD_TO_TRACE(STORE_FAST, oparg1); - ADD_TO_TRACE(LOAD_FAST, oparg2); - break; - case STORE_FAST_STORE_FAST: - ADD_TO_TRACE(STORE_FAST, oparg1); - ADD_TO_TRACE(STORE_FAST, oparg2); - break; - default: - Py_FatalError("Missing case"); - } - break; - } default: { const struct opcode_macro_expansion *expansion = &_PyOpcode_macro_expansion[opcode]; @@ -462,9 +433,11 @@ translate_bytecode_to_trace( goto done; } for (int i = 0; i < nuops; i++) { + uint64_t operand; int offset = expansion->uops[i].offset; switch (expansion->uops[i].size) { - case 0: + case OPARG_FULL: + operand = oparg; if (extras && OPCODE_HAS_JUMP(opcode)) { if (opcode == JUMP_BACKWARD_NO_INTERRUPT) { operand -= extras; @@ -475,19 +448,25 @@ translate_bytecode_to_trace( } } break; - case 1: + case OPARG_CACHE_1: operand = read_u16(&instr[offset].cache); break; - case 2: + case OPARG_CACHE_2: operand = read_u32(&instr[offset].cache); break; - case 4: + case OPARG_CACHE_4: operand = read_u64(&instr[offset].cache); break; + case OPARG_TOP: // First half of super-instr + operand = oparg >> 4; + break; + case OPARG_BOTTOM: // Second half of super-instr + operand = oparg & 0xF; + break; default: fprintf(stderr, - "opcode=%d, operand=%" PRIu64 "; nuops=%d, i=%d; size=%d, offset=%d\n", - opcode, operand, nuops, i, + "opcode=%d, oparg=%d; nuops=%d, i=%d; size=%d, offset=%d\n", + opcode, oparg, nuops, i, expansion->uops[i].size, expansion->uops[i].offset); Py_FatalError("garbled expansion"); diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index a90abfe20c1739..632834ce231664 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -40,6 +40,17 @@ UNUSED = "unused" BITS_PER_CODE_UNIT = 16 +# Constants used instead of size for macro expansions. +# Note: 1, 2, 4 must match actual cache entry sizes. +OPARG_SIZES = { + "OPARG_FULL": 0, + "OPARG_CACHE_1": 1, + "OPARG_CACHE_2": 2, + "OPARG_CACHE_4": 4, + "OPARG_TOP": 5, + "OPARG_BOTTOM": 6, +} + RESERVED_WORDS = { "co_consts" : "Use FRAME_CO_CONSTS.", "co_names": "Use FRAME_CO_NAMES.", @@ -1213,7 +1224,10 @@ def write_metadata(self) -> None: self.out.emit("struct { int16_t uop; int8_t size; int8_t offset; } uops[8];") self.out.emit("") + for key, value in OPARG_SIZES.items(): + self.out.emit(f"#define {key} {value}") self.out.emit("") + self.out.emit("#define OPCODE_METADATA_FMT(OP) " "(_PyOpcode_opcode_metadata[(OP)].instr_format)") self.out.emit("#define SAME_OPCODE_METADATA(OP1, OP2) \\") @@ -1263,6 +1277,9 @@ def write_metadata(self) -> None: # Construct a dummy Component -- input/output mappings are not used part = Component(instr, [], [], instr.active_caches) self.write_macro_expansions(instr.name, [part]) + elif instr.kind == "inst" and variable_used(instr.inst, "oparg1"): + assert variable_used(instr.inst, "oparg2"), "Half super-instr?" + self.write_super_expansions(instr.name) case parser.Macro(): mac = self.macro_instrs[thing.name] self.write_macro_expansions(mac.name, mac.parts) @@ -1342,7 +1359,7 @@ def write_macro_expansions(self, name: str, parts: MacroParts) -> None: print(f"NOTE: Part {part.instr.name} of {name} is not a viable uop") return if part.instr.instr_flags.HAS_ARG_FLAG or not part.active_caches: - size, offset = 0, 0 + size, offset = OPARG_SIZES["OPARG_FULL"], 0 else: # If this assert triggers, is_viable_uops() lied assert len(part.active_caches) == 1, (name, part.instr.name) @@ -1350,10 +1367,50 @@ def write_macro_expansions(self, name: str, parts: MacroParts) -> None: size, offset = cache.effect.size, cache.offset expansions.append((part.instr.name, size, offset)) assert len(expansions) > 0, f"Macro {name} has empty expansion?!" + self.write_expansions(name, expansions) + + def write_super_expansions(self, name: str) -> None: + """Write special macro expansions for super-instructions. + + If you get an assertion failure here, you probably have accidentally + violated one of the assumptions here. + + - A super-instruction's name is of the form FIRST_SECOND where + FIRST and SECOND are regular instructions whose name has the + form FOO_BAR. Thus, there must be exactly 3 underscores. + Example: LOAD_CONST_STORE_FAST. + + - A super-instruction's body uses `oparg1 and `oparg2`, and no + other instruction's body uses those variable names. + + - A super-instruction has no active (used) cache entries. + + In the expansion, the first instruction's operand is all but the + bottom 4 bits of the super-instruction's oparg, and the second + instruction's operand is the bottom 4 bits. We use the special + size codes OPARG_TOP and OPARG_BOTTOM for these. + """ + pieces = name.split("_") + assert len(pieces) == 4, f"{name} doesn't look like a super-instr" + name1 = "_".join(pieces[:2]) + name2 = "_".join(pieces[2:]) + assert name1 in self.instrs, f"{name1} doesn't match any instr" + assert name2 in self.instrs, f"{name2} doesn't match any instr" + instr1 = self.instrs[name1] + instr2 = self.instrs[name2] + assert not instr1.active_caches, f"{name1} has active caches" + assert not instr2.active_caches, f"{name2} has active caches" + expansions = [ + (name1, OPARG_SIZES["OPARG_TOP"], 0), + (name2, OPARG_SIZES["OPARG_BOTTOM"], 0), + ] + self.write_expansions(name, expansions) + + def write_expansions(self, name: str, expansions: list[tuple[str, int, int]]) -> None: pieces = [f"{{ {name}, {size}, {offset} }}" for name, size, offset in expansions] self.out.emit( f"[{name}] = " - f"{{ .nuops = {len(expansions)}, .uops = {{ {', '.join(pieces)} }} }}," + f"{{ .nuops = {len(pieces)}, .uops = {{ {', '.join(pieces)} }} }}," ) def emit_metadata_entry(