ADVMP 源码分析与上手

ADVMP 项目介绍

ADVMP（Android Virtual Machine Protection）是一个开源的 Android VMP 实现项目，旨在帮助安全研究人员学习和理解 VMP 的内部原理。与商业 VMP 产品不同，ADVMP 的源码完全公开，代码结构清晰，注释详尽，是学习 VMP 逆向分析的绝佳教材。

为什么要学习 ADVMP

VMP 是 Android 逆向领域中最具挑战性的保护技术之一。直接分析商业 VMP（如阿里聚安全、腾讯御安全）保护的代码，由于缺乏文档且实现复杂，学习曲线非常陡峭。ADVMP 提供了一个"简化版"的 VMP 实现，让你可以：

从源码层面理解 VM 解释器的工作原理：不是猜测，而是看到每一行实现代码
实验性地修改和扩展 VM 指令集：添加自定义指令，观察执行效果
练习 VMP 逆向分析的完整流程：因为你已经知道源码，可以验证自己的分析是否正确
理解商业 VMP 的设计思路：商业 VMP 在 ADVMP 的基础上增加了更多混淆和反调试手段

源码结构分析

项目目录结构

ADVMP/
├── app/
│   ├── build.gradle
│   └── src/
│       └── main/
│           ├── java/com/advmp/
│           │   ├── MainActivity.java      # 主界面：触发 VMP 执行
│           │   └── NativeLib.java         # JNI 接口声明
│           └── jni/
│               ├── Android.mk             # NDK 构建脚本
│               ├── Application.mk
│               ├── vm_core.c              # VM 解释器核心
│               ├── vm_core.h              # VM 解释器头文件
│               ├── vm_handlers.c          # Handler 实现集合
│               ├── vm_handlers.h
│               ├── vm_compiler.c          # 编译器：原始代码→VM字节码
│               ├── vm_compiler.h
│               ├── vm_disasm.c            # 反汇编器：VM字节码→可读文本
│               ├── vm_disasm.h
│               └── test_functions.c       # 被保护的测试函数
├── README.md
└── docs/
    ├── architecture.md                    # 架构设计文档
    ├── opcode_spec.md                     # 指令集规范
    └── extension_guide.md                 # 扩展开发指南

VM 解释器的核心架构

ADVMP 的 VM 解释器由三个核心组件构成：字节码分发循环、Handler 数组和虚拟寄存器。

虚拟寄存器

ADVMP 定义了一组虚拟寄存器，模拟真实 CPU 的寄存器：

// vm_core.h
#define VM_REG_COUNT 16

typedef struct {
    uint32_t vreg[VM_REG_COUNT];  // 16 个 32 位虚拟寄存器
    uint32_t vpc;                 // 虚拟程序计数器
    uint32_t vsp;                 // 虚拟栈指针
    uint32_t vflags;              // 标志位寄存器（ZF, CF, NF, OF）
    uint32_t vstack[1024];        // 虚拟栈空间（1024 × 32bit）
} vm_context_t;

虚拟寄存器的映射关系：

虚拟寄存器	对应用途	说明
vreg[0]	函数返回值	存储函数的返回值
vreg[1]-vreg[3]	参数传递	对应 ARM 的 R0-R2（前三个参数）
vreg[4]-vreg[11]	通用寄存器	临时变量存储
vreg[12]	虚拟帧指针	类似 ARM 的 FP
vreg[13]	虚拟栈指针	类似 ARM 的 SP
vreg[14]	虚拟链接寄存器	函数返回地址
vreg[15]	虚拟程序计数器	下一条字节码地址

字节码分发循环

分发循环是 VM 的"调度中心"，负责读取操作码并分发到对应的 Handler：

// vm_core.c - 简化版分发循环
typedef void (*handler_func_t)(vm_context_t *ctx, uint8_t *bytecode);

// Handler 函数指针表
static handler_func_t handler_table[256] = {
    NULL,               // OP 0x00: NOP (未使用)
    vm_handler_add,     // OP 0x01: ADD
    vm_handler_sub,     // OP 0x02: SUB
    vm_handler_mul,     // OP 0x03: MUL
    vm_handler_xor,     // OP 0x04: XOR
    vm_handler_and,     // OP 0x05: AND
    vm_handler_or,      // OP 0x06: OR
    vm_handler_not,     // OP 0x07: NOT
    vm_handler_mov,     // OP 0x08: MOV
    vm_handler_load,    // OP 0x09: LOAD (从内存)
    vm_handler_store,   // OP 0x0A: STORE (到内存)
    vm_handler_cmp,     // OP 0x0B: CMP
    vm_handler_jmp,     // OP 0x0C: JMP (无条件跳转)
    vm_handler_jz,      // OP 0x0D: JZ  (为零则跳)
    vm_handler_jnz,     // OP 0x0E: JNZ (非零则跳)
    vm_handler_call,    // OP 0x0F: CALL
    vm_handler_ret,     // OP 0x10: RET
    vm_handler_push,    // OP 0x11: PUSH
    vm_handler_pop,     // OP 0x12: POP
    vm_handler_shl,     // OP 0x13: SHL
    vm_handler_shr,     // OP 0x14: SHR
    // ... 更多 Handler
};

// VM 执行入口
int vm_execute(vm_context_t *ctx, uint8_t *bytecode, int bytecode_len) {
    ctx->vpc = 0;  // 初始化虚拟 PC
    
    while (ctx->vpc < bytecode_len) {
        uint8_t opcode = bytecode[ctx->vpc++];
        
        if (handler_table[opcode] != NULL) {
            handler_table[opcode](ctx, bytecode);
        } else {
            // 未知操作码，停止执行
            LOGE("Unknown opcode: 0x%02X at offset %d", opcode, ctx->vpc - 1);
            return -1;
        }
    }
    
    return 0;  // 正常退出
}

Handler 实现示例

以下是几个典型 Handler 的实现：

ADD Handler（算术加法）

// vm_handlers.c
void vm_handler_add(vm_context_t *ctx, uint8_t *bytecode) {
    // 字节码格式：ADD vDst, vSrc1, vSrc2
    // 编码：[OP_ADD] [vDst_idx] [vSrc1_idx] [vSrc2_idx]
    uint8_t dst  = bytecode[ctx->vpc++];
    uint8_t src1 = bytecode[ctx->vpc++];
    uint8_t src2 = bytecode[ctx->vpc++];
    
    ctx->vreg[dst] = ctx->vreg[src1] + ctx->vreg[src2];
    
    // 更新标志位
    if (ctx->vreg[dst] == 0) ctx->vflags |=  FLAG_ZF;
    else                    ctx->vflags &= ~FLAG_ZF;
    
    LOGV("ADD: vReg[%d] = vReg[%d](%u) + vReg[%d](%u) = %u",
         dst, src1, ctx->vreg[src1], src2, ctx->vreg[src2], ctx->vreg[dst]);
}

CMP Handler（比较）

void vm_handler_cmp(vm_context_t *ctx, uint8_t *bytecode) {
    // 字节码格式：CMP vSrc1, vSrc2
    uint8_t src1 = bytecode[ctx->vpc++];
    uint8_t src2 = bytecode[ctx->vpc++];
    
    uint32_t a = ctx->vreg[src1];
    uint32_t b = ctx->vreg[src2];
    
    // 清除旧的标志位
    ctx->vflags = 0;
    
    // 设置标志位
    if (a == b)     ctx->vflags |= FLAG_ZF;  // Zero Flag
    if (a < b)      ctx->vflags |= FLAG_CF;  // Carry Flag (无符号小于)
    if ((int32_t)a < (int32_t)b) ctx->vflags |= FLAG_NF;  // Negative Flag (有符号小于)
    
    LOGV("CMP: vReg[%d](%u) vs vReg[%d](%u), Flags=0x%X",
         src1, a, src2, b, ctx->vflags);
}

JZ Handler（条件跳转）

void vm_handler_jz(vm_context_t *ctx, uint8_t *bytecode) {
    // 字节码格式：JZ offset32
    uint32_t offset = *(uint32_t*)(bytecode + ctx->vpc);
    ctx->vpc += 4;
    
    if (ctx->vflags & FLAG_ZF) {
        ctx->vpc = offset;  // 跳转到指定偏移
        LOGV("JZ: Zero flag set, jump to offset %u", offset);
    } else {
        LOGV("JZ: Zero flag not set, continue");
    }
}

CALL Handler（函数调用）

void vm_handler_call(vm_context_t *ctx, uint8_t *bytecode) {
    // 字节码格式：CALL target_offset32
    uint32_t target = *(uint32_t*)(bytecode + ctx->vpc);
    ctx->vpc += 4;
    
    // 将返回地址压入虚拟栈
    ctx->vstack[ctx->vsp++] = ctx->vpc;
    
    // 跳转到目标地址
    ctx->vpc = target;
    
    LOGV("CALL: target=0x%X, return_addr=%u", target, ctx->vpc);
}

RET Handler（函数返回）

void vm_handler_ret(vm_context_t *ctx, uint8_t *bytecode) {
    // 从虚拟栈弹出返回地址
    ctx->vpc = ctx->vstack[--ctx->vsp];
    
    LOGV("RET: return to offset %u", ctx->vpc);
}

VM 编译器

ADVMP 包含一个简单的编译器，将 C 函数转换为 VM 字节码。编译器的工作原理：

// vm_compiler.c
// 编译器将简单的 C 代码翻译为 VM 字节码

// 示例：编译以下 C 函数
// int add_and_xor(int a, int b, int c) {
//     int t = a + b;
//     int r = t ^ c;
//     return r;
// }

// 编译结果（字节码序列）：
// OP_MOV  vReg[4], vReg[1]    ; t = a (参数1 → 临时变量)
// OP_ADD  vReg[4], vReg[4], vReg[2]  ; t = a + b
// OP_MOV  vReg[5], vReg[4]    ; r = t
// OP_XOR  vReg[5], vReg[5], vReg[3]  ; r = r ^ c
// OP_MOV  vReg[0], vReg[5]    ; return r (结果 → 返回值寄存器)
// OP_RET                      ; 返回

编译器的字节码格式

┌─────────────┬───────────────┬───────────────┬───────────────┐
│   Opcode    │  Operand 1    │  Operand 2    │  Operand 3    │
│   (1 byte)  │  (1 byte)     │  (1 byte)     │  (1/4 bytes)  │
└─────────────┴───────────────┴───────────────┴───────────────┘

操作数类型：
- 1 byte: 寄存器编号 (vReg[0] ~ vReg[15])
- 4 bytes: 32 位立即数或地址偏移

编译和运行 ADVMP

环境要求

组件	版本要求
Android NDK	r21+
Android SDK	API 21+
CMake	3.10+
Android Studio	4.0+

编译步骤

# 1. 克隆项目
git clone https://github.com/example/ADVMP.git
cd ADVMP

# 2. 使用 Android Studio 打开项目
# File → Open → 选择 ADVMP 目录

# 3. 编译
# Build → Make Project
# 或命令行编译：
cd app
../gradlew assembleDebug

运行测试

# 安装 APK
adb install -r app/build/outputs/apk/debug/app-debug.apk

# 运行测试
adb shell am start -n com.advmp/.MainActivity

# 查看日志
adb logcat | grep ADVMP

日志输出示例：

[*] ADVMP VM initialized
[*] Compiling test function: add_and_xor
[*] Bytecode generated: 21 bytes
[*] Executing VM...
[VM] MOV: vReg[4] = vReg[1]  (5)
[VM] ADD: vReg[4] = vReg[4](5) + vReg[2](3) = 8
[VM] MOV: vReg[5] = vReg[4]  (8)
[VM] XOR: vReg[5] = vReg[5](8) ^ vReg[3](6) = 14
[VM] MOV: vReg[0] = vReg[5]  (14)
[VM] RET
[*] VM execution completed, return value: 14
[*] Expected result: 14 ✓

编写自定义 Handler 扩展 VM 指令集

理解 ADVMP 最好的方式是扩展它。下面演示如何添加一个新的 MUL（乘法）Handler：

步骤一：定义操作码

在 vm_core.h 中添加新的操作码定义：

// 在现有操作码之后添加
#define OP_MUL   0x15
#define OP_DIV   0x16
#define OP_MOD   0x17

步骤二：实现 Handler

在 vm_handlers.c 中实现 MUL Handler：

void vm_handler_mul(vm_context_t *ctx, uint8_t *bytecode) {
    // 字节码格式：MUL vDst, vSrc1, vSrc2
    uint8_t dst  = bytecode[ctx->vpc++];
    uint8_t src1 = bytecode[ctx->vpc++];
    uint8_t src2 = bytecode[ctx->vpc++];
    
    ctx->vreg[dst] = ctx->vreg[src1] * ctx->vreg[src2];
    
    // 更新标志位
    if (ctx->vreg[dst] == 0) ctx->vflags |=  FLAG_ZF;
    else                    ctx->vflags &= ~FLAG_ZF;
    
    LOGV("MUL: vReg[%d] = vReg[%d](%u) * vReg[%d](%u) = %u",
         dst, src1, ctx->vreg[src1], src2, ctx->vreg[src2], ctx->vreg[dst]);
}

步骤三：注册到 Handler 表

在 vm_core.c 的 handler_table 中注册：

static handler_func_t handler_table[256] = {
    // ... 现有 Handler
    vm_handler_shl,     // OP 0x13: SHL
    vm_handler_shr,     // OP 0x14: SHR
    vm_handler_mul,     // OP 0x15: MUL  ← 新增
    vm_handler_div,     // OP 0x16: DIV  ← 新增
    vm_handler_mod,     // OP 0x17: MOD  ← 新增
};

步骤四：在编译器中支持新指令

在 vm_compiler.c 中添加对 MUL 指令的编译支持：

// 编译乘法表达式
case NODE_MUL:
    emit_opcode(bytecode, OP_MUL);
    emit_operand(bytecode, node->result_reg);
    emit_operand(bytecode, node->left->reg);
    emit_operand(bytecode, node->right->reg);
    break;

步骤五：测试新指令

// test_functions.c
// 添加测试函数
void test_mul() {
    vm_context_t ctx = {0};
    uint8_t bytecode[] = {
        OP_MOV,  4, 1,       // vReg[4] = vReg[1]
        OP_MOV,  5, 2,       // vReg[5] = vReg[2]
        OP_MUL,  6, 4, 5,    // vReg[6] = vReg[4] * vReg[5]
        OP_MOV,  0, 6,       // vReg[0] = vReg[6]
        OP_RET               // return
    };
    
    ctx.vreg[1] = 7;  // 参数 a = 7
    ctx.vreg[2] = 8;  // 参数 b = 8
    
    vm_execute(&ctx, bytecode, sizeof(bytecode));
    
    printf("MUL result: %d (expected: 56)\n", ctx.vreg[0]);
    assert(ctx.vreg[0] == 56);
}

ADVMP 与商业 VMP 的差距分析

虽然 ADVMP 是学习 VMP 的好工具，但需要理解它与商业 VMP 之间的差距：

特性	ADVMP	商业 VMP
Handler 表结构	明文的函数指针表	间接跳转表，可能加密
操作码映射	固定映射（0x01=ADD）	每次编译随机化
分发器实现	简单 switch-case	间接跳转 + 反调试
字节码编码	线性编码，操作数紧随操作码	可能分散存储、加密
指令集复杂度	20+ 条基本指令	100+ 条指令，含特殊指令
反调试	无	多层反调试保护
性能优化	无	字节码缓存、JIT 等
多架构支持	仅 ARM32	ARM32/ARM64/x86/x64

商业 VMP 的额外保护手段

操作码随机化：每次编译生成的字节码，同一个 ADD 操作可能使用不同的操作码
Handler 内联混淆：Handler 内部的代码也经过 OLLVM 混淆
字节码加密：字节码在存储时是加密的，运行时由壳解密
VM 指纹检测：检测是否在虚拟机/模拟器中运行
完整性校验：运行时校验 VM 解释器代码的完整性

使用 ADVMP 学习 VMP 逆向的方法论

方法一：已知答案练习

在 ADVMP 中选择一个测试函数
查看其 C 源码和编译后的字节码
使用 IDA 打开编译好的 SO 文件
假装不知道源码，仅通过 IDA 逆向分析
将分析结果与源码对照，检验正确性

方法二：逐步增加复杂度

先分析最简单的函数（只有 MOV 和 RET）
逐步增加算术运算（ADD、SUB、XOR）
加入循环和条件跳转（CMP、JZ、JNZ）
加入函数调用（CALL、RET）
加入内存访问（LOAD、STORE）
最后分析完整的加密算法实现

方法三：修改后分析

修改 ADVMP 的 Handler 实现（如改变操作码顺序）
添加新的 Handler
使用 IDA 分析修改后的版本
体验不同的 VM 实现对分析难度的影响

方法四：构建自动化工具

编写 IDA 脚本自动识别 Handler
编写字节码反编译工具
构建 Handler 映射表
与 Hyperpwn 等专业工具对比

总结

ADVMP 是学习 VMP 逆向分析的绝佳起点。通过阅读源码理解 VM 解释器的架构，通过扩展 Handler 深入理解 VM 的设计，通过"先理解再分析"的练习方法巩固逆向技能。虽然 ADVMP 相比商业 VMP 简化了很多，但其核心原理是相通的。掌握了 ADVMP 的分析方法后，面对商业 VMP 保护的代码，你将具备坚实的基础和系统的方法论。

本文采用署名-非商业性使用-相同方式共享 4.0 国际许可协议，转载请注明出处。