#include "ResourceCompression.hh" #include #include #include #include #include #include #include #include #include #include "Emulators/M68KEmulator.hh" #include "Emulators/PPC32Emulator.hh" #include "ResourceDecompressors/System.hh" #include "SystemDecompressors.hh" using namespace std; using namespace phosg; namespace ResourceDASM { using Resource = ResourceFile::Resource; struct DecompressorImplementation { // This field is used for internal decompressors typedef string (*decompress_fn)( const CompressedResourceHeader& header, const void* source, size_t size); decompress_fn decompress; // These fields are used for external (emulated) decompressors const void* data; size_t size; bool is_ppc; DecompressorImplementation(decompress_fn fn) : decompress(fn), data(nullptr), size(0), is_ppc(false) {} DecompressorImplementation(const void* data, size_t size, bool is_ppc) : decompress(nullptr), data(data), size(size), is_ppc(is_ppc) {} }; static vector get_candidate_decompressors( const ResourceFile* context_rf, int16_t dcmp_id, uint64_t decompress_flags) { // In order of priority, we try: // 1. dcmp resource from the context ResourceFile // 2. ncmp resource from the context ResourceFile // 3. native implementation from src/Decompressors/System*.cc // 4. system dcmp from SystemDecompressors.cc // 5. system ncmp from SystemDecompressors.cc vector ret; // First, add the file's dcmp/ncmp if present if (context_rf) { for (uint8_t is_ppc = 0; is_ppc < 2; is_ppc++) { uint64_t skip_flag = is_ppc ? DecompressionFlag::SKIP_FILE_NCMP : DecompressionFlag::SKIP_FILE_DCMP; if (decompress_flags & skip_flag) { continue; } try { uint32_t dcmp_type = is_ppc ? RESOURCE_TYPE_ncmp : RESOURCE_TYPE_dcmp; auto res = context_rf->get_resource(dcmp_type, dcmp_id); ret.emplace_back(res->data.data(), res->data.size(), false); } catch (const out_of_range& e) { } } } // Second, add resource_dasm's native implementation if (!(decompress_flags & DecompressionFlag::SKIP_NATIVE)) { if (dcmp_id == 0) { ret.emplace_back(decompress_system0); } else if (dcmp_id == 1) { ret.emplace_back(decompress_system1); } else if (dcmp_id == 2) { ret.emplace_back(decompress_system2); } else if (dcmp_id == 3) { ret.emplace_back(decompress_system3); } } // Finally, add the system dcmp/ncmp for (uint8_t is_ppc = 0; is_ppc < 2; is_ppc++) { uint64_t skip_flag = is_ppc ? DecompressionFlag::SKIP_SYSTEM_NCMP : DecompressionFlag::SKIP_SYSTEM_DCMP; if (decompress_flags & skip_flag) { continue; } try { auto sys_dcmp = get_system_decompressor(is_ppc, dcmp_id); ret.emplace_back(sys_dcmp.first, sys_dcmp.second, is_ppc); } catch (const out_of_range&) { } } return ret; } struct M68KDecompressorInputHeader { // This structure speciies what the dcmp code expects to see on its stack at // call time. // This is used to tell the program where to return to (stack pointer points // here at entry time). On actual hardware, this would be generated by a call // opcode; in our case, emulation begins within the dcmp resource already, so // we simulate a call by adding this field before the actual parameters. be_uint32_t return_addr; // Parameters to the decompressor - the m68k calling convention passes args on // the stack, so these are the actual args to the function. The v8 or v9 // format is chosen based on the version field in the compressed resource's // header. union { struct { // used when header_version == 8 be_uint32_t data_size; be_uint32_t working_buffer_addr; be_uint32_t dest_buffer_addr; be_uint32_t source_buffer_addr; } __attribute__((packed)) v8; struct { // used when header_version == 9 be_uint32_t source_resource_header; be_uint32_t dest_buffer_addr; be_uint32_t source_buffer_addr; be_uint32_t data_size; } __attribute__((packed)) v9; } __attribute__((packed)) args; // This isn't part of the input parameters; this is just a convenient place // to put a reset opcode (which stops emulation cleanly) that the program can // "return" to. be_uint16_t reset_opcode; be_uint16_t unused; } __attribute__((packed)); struct PPC32DecompressorInputHeader { // As above, this is what an ncmp function expects on its stack at call time. // Unlike in 68K-land, the PPC calling convention passes the first several // arguments in registers, so the data pointers, sizes, etc. aren't in this // structure at all. be_uint32_t saved_r1; be_uint32_t saved_cr; be_uint32_t saved_lr; be_uint32_t reserved1; be_uint32_t reserved2; be_uint32_t saved_r2; be_uint32_t unused[2]; // This is where the program returns to; we set r2 to -1 (which should never // happen normally) and make the syscall handler stop emulation be_uint32_t set_r2_opcode; be_uint32_t syscall_opcode; } __attribute__((packed)); shared_ptr decompress_resource( shared_ptr res, uint64_t decompress_flags, const ResourceFile* context_rf) { if (res->data.size() < sizeof(CompressedResourceHeader)) { throw runtime_error("resource marked as compressed but is too small"); } auto result = make_shared(); result->type = res->type; result->id = res->id; result->flags = res->flags; result->name = res->name; const auto& header = *reinterpret_cast( res->data.data()); if (header.magic != 0xA89F6572) { // It looks like some resources have the compression bit set but aren't // actually compressed. Reverse-engineering ResEdit makes it look like the // Resource Manager just treats the resource as uncompressed if this value // is missing, so let's also not fail in that case. result->flags &= ~ResourceFlag::FLAG_COMPRESSED; result->data = res->data; return result; } if (!(header.attributes & 0x01)) { throw runtime_error("resource marked as compressed but does not have compression attribute set"); } bool debug_execution = !!(decompress_flags & DecompressionFlag::DEBUG_EXECUTION); bool trace_execution = debug_execution || !!(decompress_flags & DecompressionFlag::TRACE_EXECUTION); bool verbose = trace_execution || !!(decompress_flags & DecompressionFlag::VERBOSE); int16_t dcmp_resource_id; uint16_t output_extra_bytes; if (header.header_version == 9) { dcmp_resource_id = header.version.v9.dcmp_resource_id; output_extra_bytes = header.version.v9.output_extra_bytes; } else if (header.header_version == 8) { dcmp_resource_id = header.version.v8.dcmp_resource_id; output_extra_bytes = header.version.v8.output_extra_bytes; } else { throw runtime_error("compressed resource header version is not 8 or 9"); } auto decompressors = get_candidate_decompressors( context_rf, dcmp_resource_id, decompress_flags); if (decompressors.empty()) { throw runtime_error("no decompressors are available for this resource"); } if (verbose) { fwrite_fmt(stderr, "using dcmp/ncmp {} ({} implementation(s) available)\n", dcmp_resource_id, decompressors.size()); fwrite_fmt(stderr, "note: data size is {} (0x{:X}); decompressed data size is {} (0x{:X}) bytes\n", res->data.size(), res->data.size(), header.decompressed_size, header.decompressed_size); } for (size_t z = 0; z < decompressors.size(); z++) { const auto& decompressor = decompressors[z]; if (verbose) { fwrite_fmt(stderr, "attempting decompression with implementation {} of {}\n", z + 1, decompressors.size()); } try { if (decompressor.decompress != nullptr) { // This is an internal decompressor: just call the decompress function. uint64_t start_time = now(); string decompressed_data = decompressor.decompress( header, res->data.data() + sizeof(CompressedResourceHeader), res->data.size() - sizeof(CompressedResourceHeader)); if (decompressed_data.size() != header.decompressed_size) { throw runtime_error(std::format( "internal decompressor produced the wrong amount of data ({} bytes expected, {} bytes received)", header.decompressed_size, decompressed_data.size())); } if (verbose) { float duration = static_cast(now() - start_time) / 1000000.0f; fwrite_fmt(stderr, "note: decompressed resource using internal decompressor in {:g} seconds ({} -> {} bytes)\n", duration, res->data.size(), decompressed_data.size()); } result->data = std::move(decompressed_data); } else { // This is an emulated decompressor. We'll set up memory appropriately, // then use either M68KEmulator or PPC32Emulator to run the code // contained in the dcmp or ncmp resource. auto mem = make_shared(); if (decompress_flags & DecompressionFlag::STRICT_MEMORY) { mem->set_strict(true); } uint32_t entry_pc = 0; uint32_t entry_r2 = 0; bool use_ppc_emulator; if (!decompressor.is_ppc) { use_ppc_emulator = false; // Figure out where in the dcmp to start execution. There appear to be // two formats: one that has 'dcmp' in bytes 4-8 where execution // appears to just start at byte 0 (usually it's a branch opcode), and // one where the first three words appear to be offsets to various // functions, followed by code. The second word appears to be the main // entry point in this format, so we use that to determine where to // start execution. // TODO: It looks like the decompression implementation in ResEdit // assumes the second format (with the three offsets) if and only if // the compressed resource has header format 9. This feels kind of bad // because... shouldn't the dcmp format be a property of the dcmp // resource, not the resource being decompressed? We use a heuristic // here instead, which seems correct for all decompressors I've seen. uint32_t entry_offset; if (decompressor.size < 10) { throw runtime_error("decompressor resource is too short"); } uint32_t internal_signature = *reinterpret_cast( reinterpret_cast(decompressor.data) + 4); if (internal_signature == RESOURCE_TYPE_dcmp) { entry_offset = 0; } else { // TODO: Call init and exit for decompressors that have them. It's // not clear (yet) what the arguments to init and exit should be... // they each apparently take one argument based on how they adjust // the stack before returning, but every decompressor I've seen // ignores the argument's value. entry_offset = *reinterpret_cast( reinterpret_cast(decompressor.data) + 2); } // Load the dcmp into emulated memory. dcmp resources are just raw // 68K code; there's no header beyond what's described above. size_t code_region_size = decompressor.size; uint32_t code_addr = 0xF0000000; mem->allocate_at(code_addr, code_region_size); mem->memcpy(code_addr, decompressor.data, decompressor.size); entry_pc = code_addr + entry_offset; if (verbose) { fwrite_fmt(stderr, "loaded code at {:08X}:{:X}\n", code_addr, code_region_size); fwrite_fmt(stderr, "dcmp entry offset is {:08X} (loaded at {:X})\n", entry_offset, entry_pc); } } else { // decompressor.is_ppc == true // ncmp resources are entire PEF files, so we have to parse the // header and run relocations (if any) while loading them. PEFFile f("", decompressor.data, decompressor.size); f.load_into("", mem, 0xF0000000); use_ppc_emulator = f.is_ppc(); // ncmp decompressors don't appear to define any of the standard // export symbols (init/main/term); instead, they define a single // export symbol in the export table. // TODO: It's possible that ncmps are allowed to define init and // term. Presumably this would be similar to how the unused functions // work in dcmp v9 above... reverse-engineer ResEdit some more and // figure this out. if (!f.init().name.empty()) { throw runtime_error("ncmp decompressor has init symbol"); } if (!f.main().name.empty()) { throw runtime_error("ncmp decompressor has main symbol"); } if (!f.term().name.empty()) { throw runtime_error("ncmp decompressor has term symbol"); } const auto& exports = f.exports(); if (exports.size() != 1) { throw runtime_error("ncmp decompressor does not export exactly one symbol"); } // The start symbol is actually a transition vector, which is the code // address followed by the desired value in r2. string start_symbol_name = ":" + exports.begin()->second.name; uint32_t start_symbol_addr = mem->get_symbol_addr(start_symbol_name); entry_pc = mem->read_u32b(start_symbol_addr); entry_r2 = mem->read_u32b(start_symbol_addr + 4); if (verbose) { fwrite_fmt(stderr, "ncmp entry pc is {:08X} with r2 = {:08X}\n", entry_pc, entry_r2); } } size_t stack_region_size = 1024 * 16; // 16KB should be enough size_t output_region_size = header.decompressed_size + output_extra_bytes; // TODO: Looks like some decompressors expect zero bytes after the // compressed input? Find out if this is true and fix it if not. size_t input_region_size = res->data.size() + 0x100; // TODO: This is probably way too big; probably we should use // ((data.size() * 256) / working_buffer_fractional_size) instead here? size_t working_buffer_region_size = res->data.size() * 256; // Set up data memory regions. Slightly awkward assumption: decompressed // data is never more than 256 times the size of the input data. // We intentionally put the regions pretty far from each other in the // address space in order to fail catastrophically in case of buffer // underflows or overflows; this is useful for debugging the emulators. uint32_t stack_addr = 0x10000000; mem->allocate_at(stack_addr, stack_region_size); if (!stack_addr) { throw runtime_error("cannot allocate stack region"); } uint32_t output_addr = 0x20000000; mem->allocate_at(output_addr, output_region_size); if (!output_addr) { throw runtime_error("cannot allocate output region"); } uint32_t working_buffer_addr = 0x80000000; mem->allocate_at(working_buffer_addr, working_buffer_region_size); if (!working_buffer_addr) { throw runtime_error("cannot allocate working buffer region"); } uint32_t input_addr = 0xC0000000; mem->allocate_at(input_addr, input_region_size); if (!input_addr) { throw runtime_error("cannot allocate input region"); } if (verbose) { fwrite_fmt(stderr, "memory:\n"); fwrite_fmt(stderr, " stack region at {:08X}:{:X}\n", stack_addr, stack_region_size); fwrite_fmt(stderr, " output region at {:08X}:{:X}\n", output_addr, output_region_size); fwrite_fmt(stderr, " working region at {:08X}:{:X}\n", working_buffer_addr, working_buffer_region_size); fwrite_fmt(stderr, " input region at {:08X}:{:X}\n", input_addr, input_region_size); } mem->memcpy(input_addr, res->data.data(), res->data.size()); uint64_t execution_start_time; if (use_ppc_emulator) { // Set up header in stack region uint32_t return_addr = stack_addr + stack_region_size - sizeof(PPC32DecompressorInputHeader) + offsetof(PPC32DecompressorInputHeader, set_r2_opcode); auto* input_header = mem->at( stack_addr + stack_region_size - sizeof(PPC32DecompressorInputHeader)); input_header->saved_r1 = 0xAAAAAAAA; input_header->saved_cr = 0x00000000; input_header->saved_lr = return_addr; input_header->reserved1 = 0x00000000; input_header->reserved2 = 0x00000000; input_header->saved_r2 = entry_r2; input_header->unused[0] = 0x00000000; input_header->unused[1] = 0x00000000; input_header->set_r2_opcode = 0x3840FFFF; // li r2, -1 input_header->syscall_opcode = 0x44000002; // sc // Create emulator auto interrupt_manager = make_shared(); PPC32Emulator emu(mem); emu.set_interrupt_manager(interrupt_manager); // Set up registers. r3-r6 are the function arguments, which are // analogous to the arguments to dcmp resources. auto& regs = emu.registers(); regs.r[1].u = stack_addr + stack_region_size - sizeof(PPC32DecompressorInputHeader); regs.r[2].u = entry_r2; regs.r[3].u = input_addr + sizeof(CompressedResourceHeader); regs.r[4].u = output_addr; regs.r[5].u = (header.header_version == 9) ? input_addr : working_buffer_addr; regs.r[6].u = input_region_size - sizeof(CompressedResourceHeader); regs.lr = return_addr; regs.pc = entry_pc; if (verbose) { fwrite_fmt(stderr, "initial stack contents (input header data):\n"); print_data(stderr, input_header, sizeof(*input_header), regs.r[1].u); } // Set up the debugger, if debugging is enabled shared_ptr> debugger; if (trace_execution || debug_execution) { debugger = make_shared>(); debugger->bind(emu); debugger->state.mode = debug_execution ? DebuggerMode::STEP : DebuggerMode::TRACE; } // Set up environment emu.set_syscall_handler([&](PPC32Emulator& emu) -> void { auto& regs = emu.registers(); // We don't support any syscalls in PPC mode - the only syscall that // should occur is the one at the end of emulation, when r2 == -1. if (regs.r[2].u != 0xFFFFFFFF) { throw runtime_error("unimplemented syscall"); } throw PPC32Emulator::terminate_emulation(); }); // Run the decompressor execution_start_time = now(); try { emu.execute(); } catch (const exception& e) { if (verbose) { uint64_t diff = now() - execution_start_time; float duration = static_cast(diff) / 1000000.0f; fwrite_fmt(stderr, "powerpc decompressor execution failed ({:g}sec): {}\n", duration, e.what()); } throw; } } else { // Not a PPC decompressor (it's 68K instead) // Set up header + args in the stack region auto* input_header = mem->at( stack_addr + stack_region_size - sizeof(M68KDecompressorInputHeader)); input_header->return_addr = stack_addr + stack_region_size - sizeof(M68KDecompressorInputHeader) + offsetof(M68KDecompressorInputHeader, reset_opcode); if (header.header_version == 9) { input_header->args.v9.data_size = input_region_size - sizeof(CompressedResourceHeader); input_header->args.v9.source_resource_header = input_addr; input_header->args.v9.dest_buffer_addr = output_addr; input_header->args.v9.source_buffer_addr = input_addr + sizeof(CompressedResourceHeader); } else { input_header->args.v8.data_size = input_region_size - sizeof(CompressedResourceHeader); input_header->args.v8.working_buffer_addr = working_buffer_addr; input_header->args.v8.dest_buffer_addr = output_addr; input_header->args.v8.source_buffer_addr = input_addr + sizeof(CompressedResourceHeader); } input_header->reset_opcode = 0x4E70; input_header->unused = 0x0000; // Set up registers M68KEmulator emu(mem); auto& regs = emu.registers(); regs.a[7] = stack_addr + stack_region_size - sizeof(M68KDecompressorInputHeader); regs.pc = entry_pc; if (verbose) { fwrite_fmt(stderr, "initial stack contents (input header data):\n"); print_data(stderr, input_header, sizeof(*input_header), regs.a[7]); } // Set up debugger shared_ptr> debugger; if (trace_execution || debug_execution) { debugger = make_shared>(); debugger->bind(emu); debugger->state.mode = debug_execution ? DebuggerMode::STEP : DebuggerMode::TRACE; } // Set up environment. Unlike in PPC-land, we implement a few basic // system calls here, because there are some dcmps that actually use // them. unordered_map trap_to_call_stub_addr; emu.set_syscall_handler([&](M68KEmulator& emu, uint16_t opcode) -> void { auto& regs = emu.registers(); uint16_t trap_number; bool auto_pop = false; uint8_t flags = 0; if (opcode & 0x0800) { trap_number = opcode & 0x0BFF; auto_pop = opcode & 0x0400; } else { trap_number = opcode & 0x00FF; flags = (opcode >> 9) & 3; } // We only support a few traps here. Specifically: // - System dcmp 2 uses BlockMove (which is essentially memcpy) // - Ben Mickaelian's self-modifying decompressor uses // GetTrapAddress, but it suffices to simulate the asked-for traps // with stubs since the dcmp doesn't appear to use the return // value for anything important if (trap_number == 0x002E) { // BlockMove // A0 = src, A1 = dst, D0 = size mem->memcpy(regs.a[1], regs.a[0], regs.d[0].u); regs.d[0].u = 0; // Result code (0 = success) } else if (trap_number == 0x0046) { // GetTrapAddress uint16_t trap_number = regs.d[0].u & 0xFFFF; if ((trap_number > 0x4F) && (trap_number != 0x54) && (trap_number != 0x57)) { trap_number |= 0x0800; } // If it already has a call routine, just return that try { regs.a[0] = trap_to_call_stub_addr.at(trap_number); if (verbose) { fwrite_fmt(stderr, "GetTrapAddress: using cached call stub for trap {:04X} -> {:08X}\n", trap_number, regs.a[0]); } } catch (const out_of_range&) { // Create a call stub uint32_t call_stub_addr = mem->allocate(4); be_uint16_t* call_stub = mem->at(call_stub_addr, 4); trap_to_call_stub_addr.emplace(trap_number, call_stub_addr); call_stub[0] = 0xA000 | trap_number; // A-trap opcode call_stub[1] = 0x4E75; // rts // Return the address regs.a[0] = call_stub_addr; if (verbose) { fwrite_fmt(stderr, "GetTrapAddress: created call stub for trap {:04X} -> {:08X}\n", trap_number, regs.a[0]); } } } else if (verbose) { if (trap_number & 0x0800) { fwrite_fmt(stderr, "warning: skipping unimplemented toolbox trap (num={:X}, auto_pop={})\n", static_cast(trap_number & 0x0BFF), auto_pop ? "true" : "false"); } else { fwrite_fmt(stderr, "warning: skipping unimplemented os trap (num={:X}, flags={})\n", static_cast(trap_number & 0x00FF), flags); } } }); // Run the decompressor execution_start_time = now(); try { emu.execute(); } catch (const exception& e) { if (verbose) { uint64_t diff = now() - execution_start_time; float duration = static_cast(diff) / 1000000.0f; fwrite_fmt(stderr, "m68k decompressor execution failed ({:g}sec): {}\n", duration, e.what()); emu.print_state(stderr); } throw; } } if (verbose) { uint64_t diff = now() - execution_start_time; float duration = static_cast(diff) / 1000000.0f; fwrite_fmt(stderr, "note: decompressed resource in {:g} seconds ({} -> {} bytes)\n", duration, res->data.size(), header.decompressed_size); } result->data = mem->read(output_addr, header.decompressed_size); } // If we get here, the resource was decompressed and res->data was // replaced with the decompressed data result->flags = (res->flags & ~ResourceFlag::FLAG_COMPRESSED) | ResourceFlag::FLAG_DECOMPRESSED; return result; } catch (const exception& e) { if (verbose) { fwrite_fmt(stderr, "decompressor implementation {} of {} failed: {}\n", z + 1, decompressors.size(), e.what()); } } } throw runtime_error("no decompressor succeeded"); } } // namespace ResourceDASM