Path: blob/master/Utilities/cmliblzma/liblzma/simple/arm64.c
3156 views
// SPDX-License-Identifier: 0BSD12///////////////////////////////////////////////////////////////////////////////3//4/// \file arm64.c5/// \brief Filter for ARM64 binaries6///7/// This converts ARM64 relative addresses in the BL and ADRP immediates8/// to absolute values to increase redundancy of ARM64 code.9///10/// Converting B or ADR instructions was also tested but it's not useful.11/// A majority of the jumps for the B instruction are very small (+/- 0xFF).12/// These are typical for loops and if-statements. Encoding them to their13/// absolute address reduces redundancy since many of the small relative14/// jump values are repeated, but very few of the absolute addresses are.15//16// Authors: Lasse Collin17// Jia Tan18// Igor Pavlov19//20///////////////////////////////////////////////////////////////////////////////2122#include "simple_private.h"232425static size_t26arm64_code(void *simple lzma_attribute((__unused__)),27uint32_t now_pos, bool is_encoder,28uint8_t *buffer, size_t size)29{30size_t i;3132// Clang 14.0.6 on x86-64 makes this four times bigger and 40 % slower33// with auto-vectorization that is enabled by default with -O2.34// Such vectorization bloat happens with -O2 when targeting ARM64 too35// but performance hasn't been tested.36#ifdef __clang__37# pragma clang loop vectorize(disable)38#endif39for (i = 0; i + 4 <= size; i += 4) {40uint32_t pc = (uint32_t)(now_pos + i);41uint32_t instr = read32le(buffer + i);4243if ((instr >> 26) == 0x25) {44// BL instruction:45// The full 26-bit immediate is converted.46// The range is +/-128 MiB.47//48// Using the full range helps quite a lot with49// big executables. Smaller range would reduce false50// positives in non-code sections of the input though51// so this is a compromise that slightly favors big52// files. With the full range, only six bits of the 3253// need to match to trigger a conversion.54const uint32_t src = instr;55instr = 0x94000000;5657pc >>= 2;58if (!is_encoder)59pc = 0U - pc;6061instr |= (src + pc) & 0x03FFFFFF;62write32le(buffer + i, instr);6364} else if ((instr & 0x9F000000) == 0x90000000) {65// ADRP instruction:66// Only values in the range +/-512 MiB are converted.67//68// Using less than the full +/-4 GiB range reduces69// false positives on non-code sections of the input70// while being excellent for executables up to 512 MiB.71// The positive effect of ADRP conversion is smaller72// than that of BL but it also doesn't hurt so much in73// non-code sections of input because, with +/-512 MiB74// range, nine bits of 32 need to match to trigger a75// conversion (two 10-bit match choices = 9 bits).76const uint32_t src = ((instr >> 29) & 3)77| ((instr >> 3) & 0x001FFFFC);7879// With the addition only one branch is needed to80// check the +/- range. This is usually false when81// processing ARM64 code so branch prediction will82// handle it well in terms of performance.83//84//if ((src & 0x001E0000) != 085// && (src & 0x001E0000) != 0x001E0000)86if ((src + 0x00020000) & 0x001C0000)87continue;8889instr &= 0x9000001F;9091pc >>= 12;92if (!is_encoder)93pc = 0U - pc;9495const uint32_t dest = src + pc;96instr |= (dest & 3) << 29;97instr |= (dest & 0x0003FFFC) << 3;98instr |= (0U - (dest & 0x00020000)) & 0x00E00000;99write32le(buffer + i, instr);100}101}102103return i;104}105106107static lzma_ret108arm64_coder_init(lzma_next_coder *next, const lzma_allocator *allocator,109const lzma_filter_info *filters, bool is_encoder)110{111return lzma_simple_coder_init(next, allocator, filters,112&arm64_code, 0, 4, 4, is_encoder);113}114115116#ifdef HAVE_ENCODER_ARM64117extern lzma_ret118lzma_simple_arm64_encoder_init(lzma_next_coder *next,119const lzma_allocator *allocator,120const lzma_filter_info *filters)121{122return arm64_coder_init(next, allocator, filters, true);123}124#endif125126127#ifdef HAVE_DECODER_ARM64128extern lzma_ret129lzma_simple_arm64_decoder_init(lzma_next_coder *next,130const lzma_allocator *allocator,131const lzma_filter_info *filters)132{133return arm64_coder_init(next, allocator, filters, false);134}135#endif136137138