armv8 二进制位表示(C4 章节):https://developer.arm.com/documentation/ddi0487/latest/

tools/objtool/check.h 中有这样一个 instruction 结构体表示一个指令:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
struct instruction {
struct list_head list;
struct hlist_node hash;
struct section *sec;
unsigned long offset;
unsigned int len;
enum insn_type type;
unsigned long immediate;
bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts;
bool retpoline_safe;
u8 visited;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
struct rela *jump_table;
struct list_head alts;
struct symbol *func;
struct stack_op stack_op;
struct insn_state state;
struct orc_entry orc;
};

Data Processing – Immediate

处理立即数。

对应代码:

1
2
3
4
5
6
7
8
9
10
int arm_decode_dp_imm(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
arm_decode_class decode_fun;

decode_fun = aarch64_insn_dp_imm_decode_table[INSN_DP_IMM_SUBCLASS(instr)];
if (!decode_fun)
return -1;
return decode_fun(instr, type, immediate, ops_list);
}

也就是含有dp_imm的函数都属于Data Processing -- Immediate 操作。

它的布局如下:

1
2
3
4
5
6
7
8
 31 30 29+28   |   26|25    |23|22         |                                                                            0|
| | | | | | |
+--------------+------------+--------------+-----------------------------------------------------------------------------+
| | | | |
| | | op0 | |
| | 100 | | |
| | | | |
+--------+-----------+---------+-----------------------------------------------------------------------------------------+

当 28-26 位为 100 时,armv8 的指令格式如上。其中,op0 的数字的不同有不同的作用 (C4.1.86):

Decode fields op0 Decode group or instruction
00x PC-rel. addressing
010 Add/subtract(immediate)
011 Add/subtract(immediate, with tags)
100 Logical(immediate)
101 Move wide(immediate)
110 Bitfield
111 Extract

详细情况查阅文档 (c4.1.86)

1
(((opcode) >> 23) & (NR_DP_IMM_SUBCLASS - 1))

INSN_DP_IMM_SUBCLASS 的操作就是获取 op0 的值。

tools/objtool/arch/arm64/decode.c 中有这样一个数组:

1
2
3
4
5
6
7
8
9
10
11

// 这个数组与上方表格相对
static arm_decode_class aarch64_insn_dp_imm_decode_table[NR_DP_IMM_SUBCLASS] = {
[0 ... INSN_PCREL] = arm_decode_pcrel,
[INSN_ADD_SUB] = arm_decode_add_sub,
[INSN_ADD_TAG] = arm_decode_add_sub_tags,
[INSN_LOGICAL] = arm_decode_logical,
[INSN_MOVE_WIDE] = arm_decode_move_wide,
[INSN_BITFIELD] = arm_decode_bitfield,
[INSN_EXTRACT] = arm_decode_extract,
};

这个数组用于将不同编码类别的 ARM64 指令与对应的解码函数关联。op0 的值不同有不同的解析函数。

tools/objtool/arch/arm64/include/bit_operations.h中有这样一段宏定义,定义一些常见的位操作:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// 生成 N 位全为 1 的值
// 零扩展:零扩展指的是将目标的高位数设置为零,而不是将高位数设置成原数字的最高有效位。
// 零扩展通常用于将无符号数字移动至较大的字段中,同时保留其数值;
// 而符号扩展通常用于有符号的数字。
// 提取 X 中的第 N 位位数
// 符号扩展

// '~' 为取反
// 0UL 表示把数字 0 转换为无符号长整型数
// ~0UL 表示生成一个具有所有位都为 1 的掩码
// nbits 表示 x 的最高位位置
static inline unsigned long sign_extend(unsigned long x, int nbits)
{
return ((~0UL + (EXTRACT_BIT(x, nbits - 1) ^ 1)) << nbits) | x;
}

处理 PC-rel. addressing 的情况:

布局:

1
2
3
4
5
6
7
             +            +                                 +             +
31 30 29 28| 27 26 25 24| 23 5 4 | 0|
+---+-----+---------------------------------------------+---+-------------+
| | | | | |
| op|immlo|1 0 0 0 0 | immhi | Rd |
| | | | | |
+---+-----+---------------+-----------------------------+-----------------+

具有以下设定:

op instruction
0 ADR
1 ADRP
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
int arm_decode_pcrel(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
unsigned char page = 0;
u32 immhi = 0, immlo = 0;

// 首位为 0 为 ADR 操作
// 1 则为 ADRP
page = EXTRACT_BIT(instr, 31);
// immediate 的高位
immhi = (instr >> 5) & ONES(19);
// immediate 的低位
immlo = (instr >> 29) & ONES(2);

// 符号扩展不理解为什么需要左移 2 | immlo
// 左移 2 对齐?
*immediate = SIGN_EXTEND((immhi << 2) | immlo, 21);

if (page)
*immediate = SIGN_EXTEND(*immediate << 12, 33);

*type = INSN_OTHER;

return 0;
}

关于 ADR 与 ADRP:

  • ADR 指令用于计算相对于程序计数器(PC)的地址偏移量,并将结果加载到目标寄存器中。ADR ro, jump_target
  • ADRP 则是用于计算相对于页表的地址偏移量,并将结果加载到目标寄存器。ADRP x0, page_table_entry. ADRP 在计算出的地址偏移量的高 12 位上会填充一些数据(待查找,似乎是相应的页表索引)

处理 Move wide (immediate) 的情况

布局:

1
2
3
4
5
6
7
             +            +            +                    +             +
31 30 29 28| 27 26 25 24| 23 22 21 20| 5 4 | 0|
+---+-----+-------------------+-----+-------------------+---+-------------+
| | | | | | |
| op| opc |1 0 0 1 0 1 | hw | imm16 | Rd |
| | | | | | |
+---+-----+-------------------+-----+-------------------+-----------------+

具有以下设定:

sf(op) opc hw instruction
- 01 - Unallocated.
0 - 1x Unallocated
0 00 0x MOVN-32-bit variant
0 10 0x MOVZ-32-bit variant
0 11 0x MOVK-32-bit variant
1 00 - MOVN-64-bit variant
1 10 - MOVZ-64-bit variant
1 11 - MOVK-64-bit variant
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
int arm_decode_move_wide(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
u32 imm16 = 0;
// sf 就是首位标志位 op
unsigned char hw = 0, opc = 0, sf = 0;

sf = EXTRACT_BIT(instr, 31);
opc = (instr >> 29) & ONES(2);
hw = (instr >> 21) & ONES(2);
imm16 = (instr >> 5) & ONES(16);

// 这里 (hw & 0x2) 是将 32bit 的操作剔除?
if ((sf == 0 && (hw & 0x2)) || opc == 0x1)
return arm_decode_unknown(instr, type, immediate, ops_list);

*type = INSN_OTHER;
*immediate = imm16;

return 0;
}

处理 Bitfield 的情况

布局:

1
2
3
4
5
6
 31   30 29 28  | 27 26 25 24 | 23 22 21  |       16| 15      |  10 9  |   5 4 |   0|
+---+------+----+-------------+---+--+----+-------------------+----+---+----+--+----+
| | | | | | | | |
|sf | opc | 1 0 0 1 1 0 | N| immr | imms | Rn | Rd |
| | | | | | | | |
+---+------+----------------------+--+--------------+--------------+--------+-------+

具有以下设定:

sf opc N instruction
- 11 - Unallocated.
0 - 1 Unallocated
0 00 0 SBFM-32-bit variant
0 01 0 BFM-32-bit varian
0 10 0 UBFM-32-bit varian
1 - 0 Unallocated.
1 00 1 SBFM-64-bit variant
1 01 1 BFM-64-bit varian
1 10 1 UBFM-64-bit varian
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int arm_decode_bitfield(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
unsigned char sf = 0, opc = 0, N = 0;

sf = EXTRACT_BIT(instr, 31);
opc = (instr >> 29) & ONES(2);
N = EXTRACT_BIT(instr, 22);

// 剔除 Unallocated 的指令
if (opc == 0x3 || sf != N)
return arm_decode_unknown(instr, type, immediate, ops_list);

*type = INSN_OTHER;

return 0;
}

关于BFM指令表示 Bit Field Move,位域移动指令。前面有 S 则为 signed 表示有符号,U 表示无符号。用于从一个操作数中提取一个位域,并将其移动到目标寄存器中。
SBFM <Wd>, <Wn>, #<immr>, #<imms>
其中:

  • Rd 是目标寄存器,用于存储提取的位域值。
  • Rn 是源寄存器,包含要提取位域的源操作数。
  • #immr 是右移位数的立即数。它定义了从源操作数中提取位域时的右移数。
  • #imms 是位域长度的立即数。它定义了要提取的位域的长度。

处理 extract 的情况

布局

1
2
3
4
5
6
 31   30 29 28  | 27 26 25 24 | 23 22 21 20 |     16| 15      |  10 9  |   5 4 |   0|
+---+------+----+-------------+---+--+--+---+-----------------+----+---+----+--+----+
| | | | | | | | | |
|sf | op21 | 1 0 0 1 1 1 | N|o0| Rm | imms | Rn | Rd |
| | | | | | | | | |
+---+------+----------------------+--+--+-----------+--------------+--------+-------+

具有以下设定:

sf op21 N o0 imms instruction
- x1 - - - Unallocated.
- 00 - 1 - Unallocated.
- 1x - - - Unallocated.
0 - - - 1xxxxx Unallocated.
0 - 1 - - Unallocated.
0 00 0 0 0xxxxx EXTR-32-bit variant
1 - 0 - - Unallocated.
1 00 1 0 - EXTR-64-bit variant
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
int arm_decode_extract(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
unsigned char sf = 0, op21 = 0, N = 0, o0 = 0;
unsigned char imms = 0;
unsigned char decode_field = 0;

sf = EXTRACT_BIT(instr, 31);
op21 = (instr >> 29) & ONES(2);
N = EXTRACT_BIT(instr, 22);
o0 = EXTRACT_BIT(instr, 21);
imms = (instr >> 10) & ONES(6);

// decode_field 的作用刚好可以排除 Unallocated 的情况
decode_field = (sf << 4) | (op21 << 2) | (N << 1) | o0;
*type = INSN_OTHER;
*immediate = imms;

// 10010 是 EXTR-64-bit,0 则是 EXTR-32-bit
if ((decode_field == 0 && !EXTRACT_BIT(imms, 5)) ||
decode_field == 0b10010)
return 0;

return arm_decode_unknown(instr, type, immediate, ops_list);
}

类似的,以下操作都可以根据 armv8 手册 查找到,这部分内容是处理立即数:C4.1.86

Add/subtract (immediate)

在函数 arm_decode_add_subarm_decode_add_sub 实现。

关于它的布局查看手册。其中 rn 表示源寄存器,rd 表示目标寄存器。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

int arm_decode_add_sub(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
unsigned long imm12 = 0, imm = 0;
unsigned char sf = 0, sh = 0, S = 0, op_bit = 0;
unsigned char rn = 0, rd = 0;

S = EXTRACT_BIT(instr, 29);
op_bit = EXTRACT_BIT(instr, 30);
sf = EXTRACT_BIT(instr, 31);
sh = EXTRACT_BIT(instr, 22);
rd = instr & ONES(5);
rn = (instr >> 5) & ONES(5);
imm12 = (instr >> 10) & ONES(12);
// 妙,实现了 32 和 64 位的切换
imm = ZERO_EXTEND(imm12 << (sh * 12), (sf + 1) * 32);

*type = INSN_OTHER;

// 理解不能,为什么需要判断这个指令类型?
if (rd == CFI_BP || (!S && rd == CFI_SP) || stack_related_reg(rn)) {
struct stack_op *op;

*type = INSN_STACK;

op = calloc(1, sizeof(*op));
list_add_tail(&op->list, ops_list);

op->dest.type = OP_DEST_REG;
op->dest.offset = 0;
op->dest.reg = rd;
op->src.type = OP_SRC_ADD;
// 实现 sub/add
op->src.offset = op_bit ? -1 * imm : imm;
op->src.reg = rn;
}
return 0;
}

Add/subtract (immediate, with tags)

在函数 arm_decode_add_sub_tags 实现。

Logical (immediate)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104

// 获取最高位索引
int highest_set_bit(u32 x)
{
int i;

for (i = 31; i >= 0; i--, x <<= 1)
// 0x80000000 32 位二进制形式最高位为 1
// 10000000000000000000000000000000
if (x & 0x80000000)
return i;
return 0;
}

/* imms and immr are both 6 bit long */
__uint128_t decode_bit_masks(unsigned char N, unsigned char imms,
unsigned char immr, bool immediate)
{
u64 tmask, wmask;
u32 diff, S, R, esize, welem, telem;
unsigned char levels = 0, len = 0;

// 计算 imms 中第一位为 1 的索引?
len = highest_set_bit((N << 6) | ((~imms) & ONES(6)));
levels = ZERO_EXTEND(ONES(len), 6);

if (immediate && ((imms & levels) == levels)) {
WARN("unknown instruction");
return -1;
}

S = imms & levels;
R = immr & levels;
diff = ZERO_EXTEND(S - R, 6);

esize = 1 << len;
diff = diff & ONES(len);

welem = ZERO_EXTEND(ONES(S + 1), esize);
telem = ZERO_EXTEND(ONES(diff + 1), esize);

wmask = replicate(ror(welem, esize, R), esize, 64 / esize);
tmask = replicate(telem, esize, 64 / esize);

return ((__uint128_t)wmask << 64) | tmask;
}


int arm_decode_logical(u32 instr, enum insn_type *type,
unsigned long *immediate, struct list_head *ops_list)
{
unsigned char sf = 0, opc = 0, N = 0;
unsigned char imms = 0, immr = 0, rn = 0, rd = 0;
struct stack_op *op;

rd = instr & ONES(5);
rn = (instr >> 5) & ONES(5);

imms = (instr >> 10) & ONES(6);
immr = (instr >> 16) & ONES(6);

N = EXTRACT_BIT(instr, 22);
opc = (instr >> 29) & ONES(2);
sf = EXTRACT_BIT(instr, 31);

if (N == 1 && sf == 0)
return arm_decode_unknown(instr, type, immediate, ops_list);

*type = INSN_OTHER;
*immediate = (decode_bit_masks(N, imms, immr, true) >> 64);

if (opc & 1)
return 0;

if (rd != CFI_SP)
return 0;

*type = INSN_STACK;

if (rn != CFI_SP) {
op = calloc(1, sizeof(*op));
list_add_tail(&op->list, ops_list);

op->dest.type = OP_DEST_REG;
op->dest.offset = 0;
op->dest.reg = rd;
op->src.type = OP_SRC_REG;
op->src.offset = 0;
op->src.reg = rn;
}

op = calloc(1, sizeof(*op));
list_add_tail(&op->list, ops_list);

op->dest.type = OP_DEST_REG;
op->dest.offset = 0;
op->dest.reg = rd;

op->src.type = OP_SRC_AND;
op->src.offset = 0;
op->src.reg = rd;

return 0;
}

有两个函数:decode_bit_masksarm_decode_logical,其中前者是在 bit_operations.c 文件中实现,后者调用了前者。

decode_bit_masks 返回一个 bitmask immediate。其中对于 32 位返回imms:immr,64 位返回N:imms:immr。可以查看 Arm 的实现 (https://developer.arm.com/documentation/ddi0596/2020-12/Shared-Pseudocode/AArch64-Instrs?lang=en#impl-aarch64.DecodeBitMasks.4).

在函数 arm_decode_logical.c 中实现。