iOS传参分析

iOS传参分析

通过此篇文章,您可以了解x86_64,arm32,arm64下各种函数调用的参数传递的方式;可以了解为什么arm64位下通过可变形参函数hookoc方法取值会crash以及怎么解决的思考;基于参数传递原理,此demo对应的BNRHookAction类,可实现无侵入式埋点。


备注:文中汇编代码都是在debug模式下取得;文中的可变形参方法不定形参方法是相同的概念;x86_64中的mov指令和arm上的mov指令赋值方向是相反的,并对部分汇编做了解释;在arm32位的机器上调试时,不按停止调试,直接command+R调试会崩溃

iOS函数如何传递参数

测试源码

-(void)test{
       [self testParamsPast:1 :2 :3 :4 :5 :6 :7 :8 :9 :10];
    testCFunParamsPast(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
    testVariadicFun((void *)0xAAAAAAAA, (void *)0x77777777,1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
    Method m = class_getInstanceMethod([self class], NSSelectorFromString(@"testParamsPast::::::::::"));
    method_setImplementation(m, (IMP)testVariadicFun);  
    [self testParamsPast:1 :2 :3 :4 :5 :6 :7 :8 :9 :10];
    method_setImplementation(m, (IMP)testVariadicFun2);
    [self testParamsPast:1 :2 :3 :4 :5 :6 :7 :8 :9 :10];
}

-(void)testParamsPast:(long)a :(long)b :(long)c :(long)d :(long)e :(long)f :(long)g :(long)h :(long)i :(long)j{
    long localA = 0xffffffff;
    NSLog(@"%@",@(localA));
}

void testCFunParamsPast(long a,long b,long c, long d, long e, long f,long g, long h, long i, long j){
    long localA = 0xffffffff;
    NSLog(@"%@",@(localA));
}

void testVariadicFun(void * self, void * _cmd, ...){
    va_list args;
    va_start(args, _cmd);
    long param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    va_end(args);
}

void testVariadicFun2(void * self, void * _cmd,void *parm1,void *parm2,void *parm3,void *parm4,void *parm5,void *parm6, ...){
    va_list args;
    va_start(args, parm6);
    long param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    param = va_arg(args, long);
    va_end(args);
}

x86_64函数

x86_64架构传递参数时,前6个参数分别存放在rdi``rsi``rdx``rcx``r8``r9中,剩下的参数放在栈中。


test函数汇编代码明细part_1

BNRHookAction`-[BNRTestParamsPast test]:
0x106b685a0 <+0>:   pushq  %rbp
0x106b685a1 <+1>:   movq   %rsp, %rbp
0x106b685a4 <+4>:   pushq  %r15
0x106b685a6 <+6>:   pushq  %r14
0x106b685a8 <+8>:   pushq  %r13
0x106b685aa <+10>:  pushq  %r12
0x106b685ac <+12>:  pushq  %rbx
0x106b685ad <+13>:  subq   $0x108, %rsp              ; imm = 0x108 
0x106b685b4 <+20>:  movl   $0x1, %eax
0x106b685b9 <+25>:  movl   %eax, %edx //Mark: 0x1->eax->edx,edx是rdx中的低32bit,下面解释类似
0x106b685bb <+27>:  movl   $0x2, %eax
0x106b685c0 <+32>:  movl   %eax, %ecx
0x106b685c2 <+34>:  movl   $0x3, %eax
0x106b685c7 <+39>:  movl   %eax, %r8d
0x106b685ca <+42>:  movl   $0x4, %eax
0x106b685cf <+47>:  movl   %eax, %r9d
0x106b685d2 <+50>:  movl   $0x5, %eax
0x106b685d7 <+55>:  movl   %eax, %r10d
0x106b685da <+58>:  movl   $0x6, %eax
0x106b685df <+63>:  movl   %eax, %r11d
0x106b685e2 <+66>:  movl   $0x7, %eax
0x106b685e7 <+71>:  movl   %eax, %ebx
0x106b685e9 <+73>:  movl   $0x8, %eax
0x106b685ee <+78>:  movl   %eax, %r14d
0x106b685f1 <+81>:  movl   $0x9, %eax
0x106b685f6 <+86>:  movl   %eax, %r15d
0x106b685f9 <+89>:  movl   $0xa, %eax
0x106b685fe <+94>:  movl   %eax, %r12d
0x106b68601 <+97>:  movq   %rdi, -0x30(%rbp)
0x106b68605 <+101>: movq   %rsi, -0x38(%rbp)
0x106b68609 <+105>: movq   -0x30(%rbp), %rsi
0x106b6860d <+109>: movq   0x5381c(%rip), %rdi       ; "testParamsPast::::::::::"
0x106b68614 <+116>: movq   %rdi, -0x48(%rbp)
0x106b68618 <+120>: movq   %rsi, %rdi
0x106b6861b <+123>: movq   -0x48(%rbp), %rsi
0x106b6861f <+127>: movq   $0x5, (%rsp) 	//mark:立即数5存入rsp所指向的内存地址中
0x106b68627 <+135>: movq   $0x6, 0x8(%rsp)
0x106b68630 <+144>: movq   $0x7, 0x10(%rsp)
0x106b68639 <+153>: movq   $0x8, 0x18(%rsp)
0x106b68642 <+162>: movq   $0x9, 0x20(%rsp)
0x106b6864b <+171>: movq   $0xa, 0x28(%rsp)
0x106b68654 <+180>: movq   %r12, -0x50(%rbp)
0x106b68658 <+184>: movq   %r15, -0x58(%rbp)
0x106b6865c <+188>: movq   %r14, -0x60(%rbp)
0x106b68660 <+192>: movq   %rbx, -0x68(%rbp)
0x106b68664 <+196>: movq   %r11, -0x70(%rbp)
0x106b68668 <+200>: movq   %r10, -0x78(%rbp) 
//mark:到这一步,已经把self,_cmd,1,2,3,4这6个参数放在了
//`rdi``rsi``rdx``rcx``r8``r9`,并且把5,6,7,8,9,10这几个参数存入了栈中;
//可在lldb中打印rdi和rsi,打印rsi时,需要强转成SEL类型
//这其中还有一些操作,将5,6,7,8,9,10也存入了寄存器中,但是在进入testParamsPast::::::::::后,这些寄存器被用作其他用途了;
->  0x106b6866c <+204>: callq  0x106b9d634               ; symbol stub for: objc_msgSend

testParamsPast::::::::::函数汇编代码,

 BNRHookAction`-[BNRTestParamsPast testParamsPast::::::::::]:
0x105eb3ef0 <+0>:   pushq  %rbp       //mark:保存调用者函数栈的基止指针
0x105eb3ef1 <+1>:   movq   %rsp, %rbp //mark:重新设置本函数栈的基止指针
0x105eb3ef4 <+4>:   pushq  %r15
0x105eb3ef6 <+6>:   pushq  %r14
0x105eb3ef8 <+8>:   pushq  %r13
0x105eb3efa <+10>:  pushq  %r12
0x105eb3efc <+12>:  pushq  %rbx
0x105eb3efd <+13>:  subq   $0x78, %rsp
0x105eb3f01 <+17>:  movq   0x38(%rbp), %rax
0x105eb3f05 <+21>:  movq   0x30(%rbp), %r10
0x105eb3f09 <+25>:  movq   0x28(%rbp), %r11
0x105eb3f0d <+29>:  movq   0x20(%rbp), %rbx
0x105eb3f11 <+33>:  movq   0x18(%rbp), %r14
0x105eb3f15 <+37>:  movq   0x10(%rbp), %r15
0x105eb3f19 <+41>:  movl   $0xffffffff, %r12d        ; imm = 0xFFFFFFFF 
0x105eb3f1f <+47>:  movl   %r12d, %r13d
0x105eb3f22 <+50>:  movq   %rdi, -0x30(%rbp)
0x105eb3f26 <+54>:  movq   %rsi, -0x38(%rbp)
0x105eb3f2a <+58>:  movq   %rdx, -0x40(%rbp)
0x105eb3f2e <+62>:  movq   %rcx, -0x48(%rbp)
0x105eb3f32 <+66>:  movq   %r8, -0x50(%rbp)
0x105eb3f36 <+70>:  movq   %r9, -0x58(%rbp)
//mark:以上代码是把rdi rsi rdx rcx r8 r9存入栈中,也就是
//参数列表中的前6个参数存入栈中
...

test函数汇编代码明细part_2,解前文中的明细part_1部分

这里的汇编代码是调用testCFunParamsPast,它是一个C函数;
之前的代码,和调用testParamsPast::::::::::之前做的工作是一样的,

0x106b68671 <+209>: movl   $0x1, %eax
0x106b68676 <+214>: movl   %eax, %edi
0x106b68678 <+216>: movl   $0x2, %eax
0x106b6867d <+221>: movl   %eax, %esi
0x106b6867f <+223>: movl   $0x3, %eax
0x106b68684 <+228>: movl   %eax, %edx
0x106b68686 <+230>: movl   $0x4, %eax
0x106b6868b <+235>: movl   %eax, %ecx
0x106b6868d <+237>: movl   $0x5, %eax
0x106b68692 <+242>: movl   %eax, %r8d
0x106b68695 <+245>: movl   $0x6, %eax
0x106b6869a <+250>: movl   %eax, %r9d
0x106b6869d <+253>: movl   $0x7, %eax
0x106b686a2 <+258>: movl   %eax, %r10d
0x106b686a5 <+261>: movl   $0x8, %eax
0x106b686aa <+266>: movl   %eax, %r11d
0x106b686ad <+269>: movl   $0x9, %eax
0x106b686b2 <+274>: movl   %eax, %ebx
0x106b686b4 <+276>: movl   $0xa, %eax
0x106b686b9 <+281>: movl   %eax, %r14d
0x106b686bc <+284>: movq   $0x7, (%rsp)
0x106b686c4 <+292>: movq   $0x8, 0x8(%rsp)
0x106b686cd <+301>: movq   $0x9, 0x10(%rsp)
0x106b686d6 <+310>: movq   $0xa, 0x18(%rsp)
0x106b686df <+319>: movq   %r14, -0x80(%rbp)
0x106b686e3 <+323>: movq   %rbx, -0x88(%rbp)
0x106b686ea <+330>: movq   %r11, -0x90(%rbp)
0x106b686f1 <+337>: movq   %r10, -0x98(%rbp)
0x106b686f8 <+344>: callq  0x106b688e0               ; testCFunParamsPast at BNRTestParamsPast.m:26

testCFunParamsPast函数的汇编和testParamsPast::::::::::类似,汇编就不贴上来了

test函数汇编代码明细part_3,接前文中的明细part_2部分
,这里的汇编代码是调用testVariadicFun

0x106b686fd <+349>: movl   $0xaaaaaaaa, %eax         ; imm = 0xAAAAAAAA 
0x106b68702 <+354>: movl   %eax, %edi
0x106b68704 <+356>: movl   $0x77777777, %eax         ; imm = 0x77777777 
0x106b68709 <+361>: movl   %eax, %esi
0x106b6870b <+363>: movl   $0x1, %edx
0x106b68710 <+368>: movl   $0x2, %ecx
0x106b68715 <+373>: movl   $0x3, %r8d
0x106b6871b <+379>: movl   $0x4, %r9d
0x106b68721 <+385>: movl   $0x5, %eax
0x106b68726 <+390>: movl   $0x6, %r13d
0x106b6872c <+396>: movl   $0x7, %r10d
0x106b68732 <+402>: movl   $0x8, %r11d
0x106b68738 <+408>: movl   $0x9, %r14d
0x106b6873e <+414>: movl   $0xa, %r15d
0x106b68744 <+420>: movl   $0x5, (%rsp)
0x106b6874b <+427>: movl   $0x6, 0x8(%rsp) //mark:此时rsp的值`0x7ffeebd2e350`,这里是将大于6的参数存到栈中
0x106b68753 <+435>: movl   $0x7, 0x10(%rsp)
0x106b6875b <+443>: movl   $0x8, 0x18(%rsp)
0x106b68763 <+451>: movl   $0x9, 0x20(%rsp)
0x106b6876b <+459>: movl   $0xa, 0x28(%rsp)
0x106b68773 <+467>: movl   %eax, -0x9c(%rbp)
0x106b68779 <+473>: movb   $0x0, %al
0x106b6877b <+475>: movl   %r15d, -0xa0(%rbp)
0x106b68782 <+482>: movl   %r14d, -0xa4(%rbp)
0x106b68789 <+489>: movl   %r11d, -0xa8(%rbp)
0x106b68790 <+496>: movl   %r10d, -0xac(%rbp)
0x106b68797 <+503>: movl   %r13d, -0xb0(%rbp)
0x106b6879e <+510>: callq  0x106b68990               ; testVariadicFun at BNRTestParamsPast.m:31

x86_64位系统上,va_list的定义如下

struct __va_list_tag {
    unsigned int gp_offset;
    unsigned int fp_offset;
    void *overflow_arg_area;
    void *reg_save_area;
} [1]
    

void testVariadicFun(void * self, void * _cmd, ...)中的va_start(args, _cmd)下一句话打断点,可以看到va_list变量的值;直接查看reg_save_area内存所在的地址,在偏移16(由gp_offset所指示)个字节的地址处存放了形参的第3个参数到第6个参数。它的取值规则是,当gp_offset小于48时,参数的值是*(reg_save_area + gp_offset);否则参数值等于*overflow_arg_area;再移动指针overflow_arg_area += sizeof(long)

test函数代码part_4testParamsPast::::::::::的实现指向testVariadicFun;然后调用testParamsPast::::::::::,即执行testVariadicFun 的实现,各参数取值与直接调用testVariadicFun 一样。

通过以上分析,在x86_64上,前6个参数是通过寄存器来传值的;不管是固定参数个数的函数还是不定形参的函数,在函数里面都会把那个6个寄存器的值拷贝到栈里面(Debug模式下)

armV7(arm32)上参数传递

直接分析调用testVariadicFun调用前的部分

  0x6def8 <+162>: mvn    r0, #0x55555555
0x6defc <+166>: movw   r1, #0x7777
0x6df00 <+170>: movt   r1, #0x7777
0x6df04 <+174>: movs   r2, #0x1
0x6df06 <+176>: movs   r3, #0x2
0x6df08 <+178>: movw   r9, #0x3
0x6df0c <+182>: movw   r12, #0x4
0x6df10 <+186>: movw   lr, #0x5
0x6df14 <+190>: movs   r4, #0x6
0x6df16 <+192>: movs   r5, #0x7
0x6df18 <+194>: movs   r6, #0x8
0x6df1a <+196>: movw   r8, #0x9
0x6df1e <+200>: movw   r10, #0xa
0x6df22 <+204>: str.w  r9, [sp]
0x6df26 <+208>: str.w  r12, [sp, #0x4]
0x6df2a <+212>: str.w  lr, [sp, #0x8]
0x6df2e <+216>: str    r4, [sp, #0xc]
0x6df30 <+218>: str    r5, [sp, #0x10]
0x6df32 <+220>: str    r6, [sp, #0x14]
0x6df34 <+222>: str.w  r8, [sp, #0x18]
0x6df38 <+226>: str.w  r10, [sp, #0x1c]
->  0x6df3c <+230>: bl     0x6e098                   ; testVariadicFun at BNRTestParamsPast.m:31

将前四个参数粗放在r0-r3中,后面的8个参数存放在栈中;在testVariadicFun函数中,又分别将r0-r3入栈和在调用函数之前入栈的参数组成连续的地址(按照资料说,参数入栈顺序从右至左,第一个参数最后入栈,当可不定形参函数第一二个参数有名字时,可以看到内存地址中_cmd的值反而比self后入栈);在arm架构上,va_list的定义就是一个指针,va_arg(args, long)取值和移动args的指向。

arm64参数传递

wn寄存器是xn寄存器的低32bit,在arm64下他的前8个参数分别是由x0~x7来传递的,大于8个参数压栈传递

直接调用testVariadicFun前,执行的汇编

 0x10006edc4 <+148>: mov    x8, sp
0x10006edc8 <+152>: mov    w13, #0xa
0x10006edcc <+156>: mov    x9, x13
0x10006edd0 <+160>: str    x9, [x8, #0x48] //mark:152~160,*(x8+0x48) = x9 = w13 = 0x0a,表示参数表中最后一个参数入栈,以下类似
0x10006edd4 <+164>: mov    w13, #0x9
0x10006edd8 <+168>: mov    x9, x13
0x10006eddc <+172>: str    x9, [x8, #0x40]
0x10006ede0 <+176>: orr    w13, wzr, #0x8
0x10006ede4 <+180>: mov    x9, x13
0x10006ede8 <+184>: str    x9, [x8, #0x38]
0x10006edec <+188>: orr    w13, wzr, #0x7
0x10006edf0 <+192>: mov    x9, x13
0x10006edf4 <+196>: str    x9, [x8, #0x30]
0x10006edf8 <+200>: orr    w13, wzr, #0x6
0x10006edfc <+204>: mov    x9, x13
0x10006ee00 <+208>: str    x9, [x8, #0x28]
0x10006ee04 <+212>: mov    w13, #0x5
0x10006ee08 <+216>: mov    x9, x13
0x10006ee0c <+220>: str    x9, [x8, #0x20]
0x10006ee10 <+224>: orr    w13, wzr, #0x4
0x10006ee14 <+228>: mov    x9, x13
0x10006ee18 <+232>: str    x9, [x8, #0x18]
0x10006ee1c <+236>: orr    w13, wzr, #0x3
0x10006ee20 <+240>: mov    x9, x13
0x10006ee24 <+244>: str    x9, [x8, #0x10]
0x10006ee28 <+248>: orr    w13, wzr, #0x2
0x10006ee2c <+252>: mov    x9, x13
0x10006ee30 <+256>: str    x9, [x8, #0x8]
0x10006ee34 <+260>: orr    w13, wzr, #0x1
0x10006ee38 <+264>: mov    x9, x13
0x10006ee3c <+268>: str    x9, [x8]
0x10006ee40 <+272>: mov    w13, #-0x55555556
0x10006ee44 <+276>: mov    x0, x13
0x10006ee48 <+280>: mov    w13, #0x77777777
0x10006ee4c <+284>: mov    x1, x13
0x10006ee50 <+288>: bl     0x10006efb4               ; testVariadicFun at BNRTestParamsPast.m:31

可以看到从10~1都是依次入栈,并没有通过寄存器来传递参数,而第一个参数和第二个参数则是通过x0x1来传递参数

testVariadicFun的部分汇编代码

0x1000e6fb4 <+0>:   sub    sp, sp, #0x70             ; =0x70 
0x1000e6fb8 <+4>:   str    x0, [sp, #0x68] //mark:这里将x0入栈,这个地址刚好挨着第三个参数(参数值为1)入栈的地址 
0x1000e6fbc <+8>:   str    x1, [sp, #0x60]
0x1000e6fc0 <+12>:  add    x0, sp, #0x70             ; =0x70 
0x1000e6fc4 <+16>:  str    x0, [sp, #0x58]

总结:直接调用不定形参函数,函数的参数的地址也是连续分布的,且没名字的参数是直接通过入栈来传递的,可以通过va_list移动指针来获得各个参数的值

test函数代码将testParamsPast::::::::::的实现指向testVariadicFun;然后调用testParamsPast::::::::::,即执行testVariadicFun 的实现;再执行testParamsPast::::::::::前的汇编代码

 0x1000e6eac <+380>: orr    x2, xzr, #0x1
0x1000e6eb0 <+384>: orr    x3, xzr, #0x2
0x1000e6eb4 <+388>: orr    x4, xzr, #0x3
0x1000e6eb8 <+392>: orr    x5, xzr, #0x4
0x1000e6ebc <+396>: mov    x6, #0x5
0x1000e6ec0 <+400>: orr    x7, xzr, #0x6
0x1000e6ec4 <+404>: orr    x8, xzr, #0x7
0x1000e6ec8 <+408>: orr    x9, xzr, #0x8
0x1000e6ecc <+412>: mov    x10, #0x9
0x1000e6ed0 <+416>: mov    x11, #0xa
0x1000e6ed4 <+420>: adrp   x12, 83
0x1000e6ed8 <+424>: add    x12, x12, #0xed0          ; =0xed0 
0x1000e6edc <+428>: ldur   x1, [x29, #-0x8]
0x1000e6ee0 <+432>: ldr    x12, [x12]
0x1000e6ee4 <+436>: stur   x0, [x29, #-0x30]
0x1000e6ee8 <+440>: mov    x0, x1  //mark:x0存self的值
0x1000e6eec <+444>: mov    x1, x12  //mark:x1存_cmd的值 
0x1000e6ef0 <+448>: str    x8, [sp] //mark:最后的4个参数入栈
0x1000e6ef4 <+452>: str    x9, [sp, #0x8]
0x1000e6ef8 <+456>: str    x10, [sp, #0x10]
0x1000e6efc <+460>: str    x11, [sp, #0x18]
->  0x1000e6f00 <+464>: bl     0x10011b420               ; symbol stub for: objc_msgSend

可以看到,x0~x7分别存了前8个参数的值(包括objc_msgSend 默认的self_cmd参数),后面4个参数存通过入栈传递

再来看testVariadicFun的汇编代码,和直接调用他的汇编代码一样的;通过va_start(args, _cmd)获取第一个可变形参(第三个参数)的地址,地址获取了,但是x2~x7的值并没有入栈,所以在arm64上用可变形参函数hook``OC函数的实现,再用va_list来取得参数的值会出错

如何解决arm64用可变形参函数hook``OC函数的实现,取参数值不正确的问题

为可变形参函数增加至8个参数来解决arm64用可变形参函数hook``OC函数的实现

通过汇编代码分析,只要传递的参数是有名字的,那么在函数实现的开始部分,就会把对应的传递参数的寄存器的值入栈(参数1~参数8分别由高至低存放,和只用栈来传递参数方式地址相反),这个入栈的地址对应的就是参数中变量的地址,入栈后,便可通过va_start(args, _cmd)获取第一个可变形参(第9个参数)参数的地址了,并能通过地址正确取到参数的值了;可用如下可变函数hookarm64的方法

void testVariadicFun2(void * self, void * _cmd,void *parm1,void *parm2,void *parm3,void *parm4,void *parm5,void *parm6, ...) 

直接取通过寄存器传值的参数

事实上,第一种方法由于param1param6的类型是void *,它只会取得x2x7寄存器的值,而当参数中有floatdouble类型时,param1~`parma6就拿不到正确的值了;而且当使用va_list来从栈中取值时,由于是hookOC函数,在调用函数时,编译器并不认为你调用的是可变形参函数,所以不会进行8字节对齐,只能取得参数类型占用8个字节(long,指针(OC对对象),double)的参数的值; 使用OC`和汇编混编的方式来取得寄存器的值,需要在进入函数时,就立刻开始取值,否则传递参数的寄存器的值会因为函数中的代码而改变,取值代码如下:

float f0,f1,f2,f3,f4,f5,f6,f7;
double d0,d1,d2,d3,d4,d5,d6,d7;
long x0,x1,x2,x3,x4,x5,x6,x7;
asm(
    "fmov   %w0,s0\n"
    "fmov   %8,d0\n"
    "fmov   %w1,s1\n"
    "fmov   %9,d1\n"
    "fmov   %w2,s2\n"
    "fmov   %10,d2\n"
    "fmov   %w3,s3\n"
    "fmov   %11,d3\n"
    "fmov   %w4,s4\n"
    "fmov   %12,d4\n"
    "fmov   %w5,s5\n"
    "fmov   %13,d5\n"
    "fmov   %w6,s6\n"
    "fmov   %14,d6\n"
    "fmov   %w7,s7\n"
    "fmov   %15,d7\n"
    :"=r"(f0),"=r"(f1),"=r"(f2),"=r"(f3),"=r"(f4),"=r"(f5),"=r"(f6),"=r"(f7),"=r"(d0),"=r"(d1),"=r"(d2),"=r"(d3),"=r"(d4),"=r"(d5),"=r"(d6),"=r"(d7)
    :
    :"x2","x3","x4","x5","x6","x7"
    );
asm(
    "mov   %0,x2\n"
    "mov   %1,x3\n"
    "mov   %2,x4\n"
    "mov   %3,x5\n"
    "mov   %4,x6\n"
    "mov   %5,x7\n"
    :"=r"(x2),"=r"(x3),"=r"(x4),"=r"(x5),"=r"(x6),"=r"(x7)
    :
    :
    );   
    
    

链接

源码链接