Fuzz前置技能-unicorn模拟执行


Unicorn是一个轻量级, 多平台, 多架构的CPU模拟器框架,基于qemu开发,它可以代替CPU模拟代码的执行,常用于恶意代码分析,Fuzz等,该项目被用于Radare2逆向分析框架,GEF(gdb的pwn分析插件),Pwndbg,Angr符号执行框架等多个著名项目。

开发准备

官网

Unicorn

编译安装

安装提供了两种方式:

  • 从源安装

    • OSX(homebrew)

      brew install unicorn
      # homebrew安装好后需要设置library的全局变量
      export DYLD_LIBRARY_PATH=/usr/local/opt/unicorn/lib/:$DYLD_LIBRARY_PATH
      
      # 然后安装python库
      pip install unicorn
      
    • Linux参考官方安装手册

    • Winodws参考官方安装手册

  • 从源码构建

    • 下载源码包:

      Download

      # 构建
      ./make.sh
      sudo ./make.sh install
      
      # 安装python 库
      pip install unicorn
      
      # 升级unicorn库
      pip install unicorn --upgrade
      

    其他交叉编译(如IOS、ARM、Android)参考官方文档

快速入门

安装好unicorn后,可以用下面的测试用例来检测unicorn的功能是否可用

Python

  • 以下为Python调用unicorn框架测试代码

    from __future__ import print_function
    from unicorn import *
    from unicorn.x86_const import *
    
    # 要模拟执行的指令
    X86_CODE32 = b"\x41\x4a" # INC ecx; DEC edx
    
    # 模拟执行的起始地址
    ADDRESS = 0x1000000
    
    print("Emulate i386 code")
    try:
        # 初始化模拟X86-32模式
        mu = Uc(UC_ARCH_X86, UC_MODE_32)
        # 为模拟执行申请2MB的空间
        mu.mem_map(ADDRESS, 2 * 1024 * 1024)
    
        # 向内存写入执行的指令
        mu.mem_write(ADDRESS, X86_CODE32)
    
        # 初始化寄存器的值,方便执行后观察结果
        mu.reg_write(UC_X86_REG_ECX, 0x1234)
        mu.reg_write(UC_X86_REG_EDX, 0x7890)
    
        # 在无限时间和无限指令中模拟代码
        mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE32))
    
        # 现在打印执行后寄存器中的结果
        print("Emulation done. Below is the CPU context")
    
        r_ecx = mu.reg_read(UC_X86_REG_ECX)
        r_edx = mu.reg_read(UC_X86_REG_EDX)
        print(">>> ECX = 0x%x" %r_ecx)
        print(">>> EDX = 0x%x" %r_edx)
    
    except UcError as e:
        print("ERROR: %s" % e)
    

    最终输出结果:

    Emulate i386 code
    Emulation done. Below is the CPU context
    >>> ECX = 0x1235
    >>> EDX = 0x788f
    

    看到结果,ECX被加了1,并且EDX被减去1,表示python可以成功调用unicorn

C

  • 以下为C调用unicorn框架测试代码

    #include
    #include
    #include
    
    #define ADDRESS 0x1000000
    #define X86_CODE32 "\x41\x4a\x66\x0f\xef\xc1" // INC ecx; DEC edx; PXOR xmm0, xmm1
    
    // 在终端中输出起始地址和硬编码大小
    static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data)
    {
        printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size);
    }
    
    // hook 回调函数,用于监视程序运行时的变化
    static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data)
    {
        int eflags;
        printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size);
    
        uc_reg_read(uc, UC_X86_REG_EFLAGS, &eflags); //获取寄存器值放入eflags变量中
        printf(">>> --- EFLAGS is 0x%x\n", eflags);
    
        // Uncomment below code to stop the emulation using uc_emu_stop()
        // if (address == 0x1000009)
        //    uc_emu_stop(uc);
    }
    
    static void test_i386(void)
    {
        uc_engine *uc;
        uc_err err;
        uint32_t tmp;
        uc_hook trace1, trace2;
    
        int r_ecx = 0x1234;     // ECX 寄存器
        int r_edx = 0x7890;     // EDX 寄存器
        // XMM0 、 XMM1 寄存器, 数组分别为低64位和高64位
        uint64_t r_xmm0[2] = {0x08090a0b0c0d0e0f, 0x0001020304050607};
        uint64_t r_xmm1[2] = {0x8090a0b0c0d0e0f0, 0x0010203040506070};
    
        printf("Emulate i386 code\n");
    
        // 初始化x86环境
        err = uc_open(UC_ARCH_X86, UC_MODE_32, &uc);
        if (err) {
            printf("Failed on uc_open() with error returned: %u\n", err);
            return;
        }
    
        // 为模拟执行代码申请 2MB 内存
        uc_mem_map(uc, ADDRESS, 2 * 1024 * 1024, UC_PROT_ALL);
    
        // 向目标地址写入opcode
        if (uc_mem_write(uc, ADDRESS, X86_CODE32, sizeof(X86_CODE32) - 1)) {
            printf("Failed to write emulation code to memory, quit!\n");
            return;
        }
    
        // 初始化寄存器ECX、EDX、XMM0、XMM1
        uc_reg_write(uc, UC_X86_REG_ECX, &r_ecx);
        uc_reg_write(uc, UC_X86_REG_EDX, &r_edx);
        uc_reg_write(uc, UC_X86_REG_XMM0, &r_xmm0);
        uc_reg_write(uc, UC_X86_REG_XMM1, &r_xmm1);
    
        // 在函数内插桩,成功时会调用回调函数 
        uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0);
    
        // 每当代码执行时调用回调函数
        uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, 1, 0);
    
        // 模拟执行
        err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(X86_CODE32) - 1, 0, 0);
        if (err) {
            printf("Failed on uc_emu_start() with error returned %u: %s\n",
                    err, uc_strerror(err));
        }
    
        // 最后输出一些模拟执行完成后寄存器的值
        printf(">>> Emulation done. Below is the CPU context\n");
    
        uc_reg_read(uc, UC_X86_REG_ECX, &r_ecx);
        uc_reg_read(uc, UC_X86_REG_EDX, &r_edx);
        uc_reg_read(uc, UC_X86_REG_XMM0, &r_xmm0);
        printf(">>> ECX = 0x%x\n", r_ecx);
        printf(">>> EDX = 0x%x\n", r_edx);
        printf(">>> XMM0 = 0x%.16"PRIx64"%.16"PRIx64"\n", r_xmm0[1], r_xmm0[0]);
    
        // 读取内存中的内容
        if (!uc_mem_read(uc, ADDRESS, &tmp, sizeof(tmp)))
            printf(">>> Read 4 bytes from [0x%x] = 0x%x\n", ADDRESS, tmp);
        else
            printf(">>> Failed to read 4 bytes from [0x%x]\n", ADDRESS);
    
        // 最后需要关闭,否则会导致内存泄露
        uc_close(uc);
    }
    
    int main(){
        test_i386();
        return 0;
    }
    

    编辑Makefile进行编译:

    LDFLAGS += $(shell pkg-config --libs glib-2.0) -lpthread -lm -lunicorn
    
    all: test2
    %: %.c
    	$(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@
    

    上面的Makefile等同于命令:

    cc  test2.c -L/usr/local/Cellar/glib/2.70.1/lib -L/usr/local/opt/gettext/lib -lglib-2.0 -lintl -lpthread -lm -lunicorn -o test2
    

    运行结果如下:

    Emulate i386 code
    >>> Tracing basic block at 0x1000000, block size = 0x6
    >>> Tracing instruction at 0x1000000, instruction size = 0x1
    >>> --- EFLAGS is 0x0
    >>> Tracing instruction at 0x1000001, instruction size = 0x1
    >>> --- EFLAGS is 0x4
    >>> Tracing instruction at 0x1000002, instruction size = 0x4
    >>> --- EFLAGS is 0x10
    >>> Emulation done. Below is the CPU context
    >>> ECX = 0x1235
    >>> EDX = 0x788f
    >>> XMM0 = 0x00112233445566778899aabbccddeeff
    >>> Read 4 bytes from [0x1000000] = 0xf664a41
    

Go

  • Go语言需要安装packge

    go get github.com/unicorn-engine/unicorn/bindings/go/unicorn
    
  • 示例代码如下

    package main
    
    import (
    	"fmt"
    	"github.com/unicorn-engine/unicorn/bindings/go/unicorn"
    )
    
    func main()  {
    	un,_:=unicorn.NewUnicorn(unicorn.ARCH_X86,unicorn.MODE_32)
    	code := []byte{184,210,4,0,0} // mov eax,1234
    	un.MemMap(0x1000,0x1000)
    	un.MemWrite(0x1000,code)
    	err:=un.Start(0x1000,0x1000+uint64(len(code)))
    	if err!=nil{
    		panic(err)
    	}
    	eax,_:=un.RegRead(unicorn.X86_REG_EAX)
    	fmt.Println(eax)
    }
    

    最终输出结果:1234。注意这里是十进制

其他unicorn示例

还有其他更多的python示例,也包含其他编程语言的示例,其中Go、Java、ruby、rust、pascal等的示例代码,可以参考链接:

unicorn/bindings at master · unicorn-engine/unicorn

API参考

C语言函数定义在unicorn.h头文件中,Python函数定义在unicorn_const.pyunicorn.py中,函数和定义简短,用时再看也来得及。

总结

简单来说,可以把unicorn理解成一个CPU,把需要执行的代码片段和内存空间布局好,unicorn会执行代码片段,并返回结果。unicorn可以模拟执行多种架构的指令,比如ARM、x86、MIPS等,并且有多种语言的API接口,其中我比较喜欢用的是Python、C和Go,可以根据自己喜欢的语言基于unicorn进行开发,写出自己的一些工具,比如fuzzer、恶意代码分析工具、二进制插桩、加密算法分析等。