1. unsigned int target_function(unsigned int n)
    2. {
    3. unsigned int mod = n % 4;
    4. unsigned int result = 0;
    5. if (mod == 0) result = (n | 0xBAAAD0BF) * (2 ^ n);
    6. else if (mod == 1) result = (n & 0xBAAAD0BF) * (3 + n);
    7. else if (mod == 2) result = (n ^ 0xBAAAD0BF) * (4 | n);
    8. else result = (n + 0xBAAAD0BF) * (5 & n);
    9. return result;
    10. }

    https://github.com/obfuscator-llvm/obfuscator/wiki/Control-Flow-Flattening
    ../build/bin/clang -m32 target.c -o target_flat -mllvm -fla -mllvm -perFLA=100
    控制流平坦化并不会施加指令上的混淆和保护,代码依然是可读的,但是控制流图被破坏了。想要恢复函数原有的CFG图需要理清楚各个基本块之间的关系。

    image.png
    由该图可以看出,真实块的后继块就是 预处理器 只需要 找到预处理器 就ok,ret 块是没有后继的块 序言是没有前继。
    然后找各个块之间逻辑关系的方法就是根据cfg图先找没有前继块的序言块和没有后继块的ret块,然后根据主分发器的前继来寻找预处理器,找到预处理器之后直接找真实块,然而最大的问题在于如何确定真实块之间的逻辑关系
    解决这个问题的其中一种方式就是通过符号执行,Miasm框架或者angr框架都能起到相应的效果。
    angr对于这个问题的处理方法是通过修改临时变量,再执行就可以得到分支的地址。这一做法可以得到两个分支的逻辑,如果遇到call就直接返回,看看执行的块是否在真实块集合里面,如果在的话就把真实块地址返回最后就是根据调用块来直接patch。如果是有分支的话,针对产生分支的真实块把CMOV指令改成相应的条件跳转指令跳向符合条件的分支,例如CMOVZ 改成JZ,再在这条之后添加JMP 指令跳向另一分支。

    1. # Imports from Miasm framework
    2. from miasm2.core.bin_stream import bin_stream_str
    3. from miasm2.arch.x86.disasm import dis_x86_32
    4. from miasm2.arch.x86.ira import ir_a_x86_32
    5. from miasm2.arch.x86.regs import all_regs_ids, all_regs_ids_init
    6. from miasm2.ir.symbexec import symbexec
    7. from miasm2.expression.simplifications import expr_simp
    8. # Binary path and offset of the target function
    9. offset = 0x3e0
    10. fname = "../src/target"
    11. # Get Miasm's binary stream
    12. bin_file = open(fname).read()
    13. bin_stream = bin_stream_str(bin_file)
    14. # Disassemble blocks of the function at 'offset'
    15. mdis = dis_x86_32(bin_stream)
    16. disasm = mdis.dis_multibloc(offset)
    17. # Create target IR object and add all basic blocks to it
    18. ir = ir_a_x86_32(mdis.symbol_pool)
    19. for bbl in disasm: ir.add_bloc(bbl)
    20. # Init our symbols with all architecture known registers
    21. symbols_init = {}
    22. for i, r in enumerate(all_regs_ids):
    23. symbols_init[r] = all_regs_ids_init[i]
    24. # Create symbolic execution engine
    25. symb = symbexec(ir, symbols_init)
    26. # Get the block we want and emulate it
    27. # We obtain the address of the next block to execute
    28. block = ir.get_bloc(offset)
    29. nxt_addr = symb.emulbloc(block)
    30. # Run the Miasm's simplification engine on the next
    31. # address to be sure to have the simplest expression
    32. simp_addr = expr_simp(nxt_addr)
    33. # The simp_addr variable is an integer expression (next basic block offset)
    34. if isinstance(simp_addr, ExprInt):
    35. print("Jump on next basic block: %s" % simp_addr)
    36. # The simp_addr variable is a condition expression
    37. elif isinstance(simp_addr, ExprCond):
    38. branch1 = simp_addr.src1
    39. branch2 = simp_addr.src2
    40. print("Condition: %s or %s" % (branch1,branch2))
    41. ############################################################################
    42. # Here we disassemble target function and collect relevants blocks
    43. # Collapsed for clarity but nothing complicated here, and the algorithm is given above
    44. relevants = get_relevants_blocks()
    45. # Control flow dictionnary {parent: set(childs)}
    46. flow = {}
    47. # Init flow dictionnary with empty sets of childs
    48. for r in relevants: flow[r] = set()
    49. # Start loop of symbolic execution
    50. while True:
    51. block_state = # Get next block state to emulate
    52. # Get current branch parameters
    53. # "parent_addr" is the parent block variable se seen earlier
    54. # "symb" is the context (symbols) of the current branch
    55. parent_addr, block_addr, symb = block_state
    56. # If it is a relevant block
    57. if block_addr in flow:
    58. # We avoid the prologue's parent, as it doesn't exist
    59. if parent_addr != ExprInt32(prologue_parent):
    60. # Do the link between the block and its relevant parent
    61. flow[parent_addr].add(block_addr)
    62. # Then we set the block as the new relevant parent
    63. parent_addr = block_addr
    64. # Finally, we can emulate the next block and so on.

    关于控制流平坦化 - 图2