diff --git a/images/reversing_guardianrs1/VMArch.png b/images/reversing_guardianrs1/VMArch.png new file mode 100644 index 0000000..99aeeec Binary files /dev/null and b/images/reversing_guardianrs1/VMArch.png differ diff --git a/images/reversing_guardianrs1/calc_entry.png b/images/reversing_guardianrs1/calc_entry.png new file mode 100644 index 0000000..f61631d Binary files /dev/null and b/images/reversing_guardianrs1/calc_entry.png differ diff --git a/images/reversing_guardianrs1/calc_function.png b/images/reversing_guardianrs1/calc_function.png new file mode 100644 index 0000000..74fb9ad Binary files /dev/null and b/images/reversing_guardianrs1/calc_function.png differ diff --git a/images/reversing_guardianrs1/calc_function_asm.png b/images/reversing_guardianrs1/calc_function_asm.png new file mode 100644 index 0000000..e8ac349 Binary files /dev/null and b/images/reversing_guardianrs1/calc_function_asm.png differ diff --git a/images/reversing_guardianrs1/disassembled_bytecode_sample.png b/images/reversing_guardianrs1/disassembled_bytecode_sample.png new file mode 100644 index 0000000..eed1c17 Binary files /dev/null and b/images/reversing_guardianrs1/disassembled_bytecode_sample.png differ diff --git a/images/reversing_guardianrs1/function_bytecode.png b/images/reversing_guardianrs1/function_bytecode.png new file mode 100644 index 0000000..fe9958f Binary files /dev/null and b/images/reversing_guardianrs1/function_bytecode.png differ diff --git a/images/reversing_guardianrs1/vmentry_firstblock.png b/images/reversing_guardianrs1/vmentry_firstblock.png new file mode 100644 index 0000000..f79bd89 Binary files /dev/null and b/images/reversing_guardianrs1/vmentry_firstblock.png differ diff --git a/images/reversing_guardianrs1/vmentry_secondblock.png b/images/reversing_guardianrs1/vmentry_secondblock.png new file mode 100644 index 0000000..8648874 Binary files /dev/null and b/images/reversing_guardianrs1/vmentry_secondblock.png differ diff --git a/misc/reversing_guardianrs1/disassembler.py b/misc/reversing_guardianrs1/disassembler.py new file mode 100644 index 0000000..a418494 --- /dev/null +++ b/misc/reversing_guardianrs1/disassembler.py @@ -0,0 +1,153 @@ +from enum import IntEnum +import struct +import argparse + +def parse_args() : + parser = argparse.ArgumentParser(description="Disassembles Guardian-rs VM bytecode") + + parser.add_argument("--print-pc", "-ppc", action="store_true", default=False, + help="Print the program counter (default: False)") + parser.add_argument("--in", "-i", dest="infile", type=str, required=True, + help="Input bytecode file path") + parser.add_argument("--out", "-o", dest="outfile", type=str, required=True, + help="Output file path") + + args = parser.parse_args() + return args + +class Opcode(IntEnum): + Const = 0 + Load = 1 + LoadXmm = 2 + Store = 3 + StoreXmm = 4 + StoreReg = 5 + StoreRegZx = 6 + Add = 7 + Sub = 8 + Div = 9 + IDiv = 10 + Shr = 11 + Mul = 12 + IMul = 13 + And = 14 + Or = 15 + Xor = 16 + Not = 17 + Cmp = 18 + RotR = 19 + RotL = 20 + Jmp = 21 + Vmctx = 22 + VmAdd = 23 + VmMul = 24 + VmSub = 25 + VmReloc = 26 + VmExec = 27 + VmExit = 28 + + +class OpSize(IntEnum): + Byte = 1 + Word = 2 + Dword = 4 + Qword = 8 + + +class JmpCond(IntEnum): + Jmp = 0 + Je = 1 + Jne = 2 + Jbe = 3 + Ja = 4 + Jae = 5 + Jle = 6 + Jg = 7 + + +class Register(IntEnum): + Rax = 0 + Rcx = 1 + Rdx = 2 + Rbx = 3 + Rsp = 4 + Rbp = 5 + Rsi = 6 + Rdi = 7 + R8 = 8 + R9 = 9 + R10 = 10 + R11 = 11 + R12 = 12 + R13 = 13 + R14 = 14 + R15 = 15 + +def disassemble(program, args): + s = [] + pc = 0 + last_instr = None + + while pc < len(program): + addr_str = f"0x{format(pc, 'x')}: " + + op = Opcode(program[pc]) + op_size = OpSize(program[pc + 1]) + + pc += 2 + + if args.print_pc : + s.append(addr_str) + s.append(op.name) + s.append(op_size.name[0]) + + #jmp modifies pc as well, was too lazy to implement + + if op == Opcode.VmExec: + instr_size = OpSize(program[pc]) + pc += 1 + pc += instr_size + + if op == Opcode.Const or op == Opcode.VmReloc: + value = 0 + if op_size == OpSize.Qword: + value = struct.unpack_from(' diff --git a/pages/reversing_guardianrs1.html b/pages/reversing_guardianrs1.html new file mode 100644 index 0000000..e7ad691 --- /dev/null +++ b/pages/reversing_guardianrs1.html @@ -0,0 +1,62 @@ + + +
+

Reverse engineering Guardian-rs's virtualization

+ + + +
+

I always wanted to analyze a binary file protected with virtualization, I've read a few papers (some of them I will link at the end of the article). But it always seemed way too hard for me, especially with all of the obfuscation and code mutation commercial virtualizer use.

I needed something I could learn from and that didn't add much obfuscation to the PE, introducing Guardian-rs

This open source project is written in rust and implements a simple stack-based VM.

Let's begin !

+

+ Protecting a function +

+

I created this simple test function that will be virtualized :

+

+ +

+

Notice the #pragma optimize("", off), this way the compiler doesn't optimize out the function. +

Loading up the code in IDA I can see the following assembly code for the calc function :

+

+ +

+

So far, so good.

After protecting the function with Guardian-rs (which was actually pretty fast) :

+

+ +

+

We can see that the code of the function got replaced with a jmp preceded by a push .

What could this possibly mean ??

+

+ Understanding the VM structure +

+

A typical Virtual machine usually follows this architecture (I stole the image from msreverseengineering's blog) :

+

+ +

+

Following this pattern, we can enter in the VMEntry function and continue our analysis :

+

+ +

+

We can see that the flags and the registers are pushed onto the stack to keep the program state as it was.

Then the VM will initialize its internal structures, mainly the Machine structure which mainly holds :

  • The program counter
  • The stack pointer
  • The registers
  • The flags

Note that the VM uses syscalls to NtAllocateVirtualMemory to allocate its context.

Then the VM pops the registers and copies them to its internal context structure, this way, the interpreted bytecode can inherit from the "normal" context of the program.

+

+ +

+

And finally the VM calls the VMDispatcher . The role of this function is to read, decode and interpret the opcodes of the function. In its most primitive implementation, it's as simple as a function pointer.

Before calling the function, the VM pushes the address of the first instruction to be executed onto the stack. Which is the offset pushed by the calc function (just before the jmp ), plus the base address of the program (fetched using the Thread Environement Block).

Following this address we can find the bytecode for our function !

+

+ +

+

+

+ Writing a bytecode disassembler +

+

At this stage, we'd have to reverse engineer the VMDispatcher to understand how the bytecode is encoded but I instead decided to cheat and look at the source code of the virtualizer (lol).

The bytecode follows this simple format :

Mnemonic (1 byte)Size of the data (1 byte) Argument (1 byte or more)
0x16 (Vmctx)0x08 (Qword)None
0x00 (Const)0x08 (Qword)0x20 (Rdx)

Following this example, we can retrieve the first instructions of our function !

Using this simple logic and by looking at the existing code of Guardian-rs I could write a disassembler (code here ).

It gave me the following code :

+

+ +

+

We can see a few Vmctx which pushes a pointer to the Machine struct onto the stack.

Basically in 8 lines it has copied the content of ~Rdx~ and ~Rsp~ at the top of the stack.

+

+ Conclusion and todo list +

+

Todo :

  • Translate the disassembly into intel x86 assembly
  • Automate the process
  • Optimize the code

There is still work to be done but i'm pretty proud of what I did, especially considering it's my first time reversing a VM.

I want to thank the author of the Guardian-rs project who kindly responded to my questions and to the people who made the articles I based my work on :

To be continued...

+

+ + + \ No newline at end of file diff --git a/python_script/generate_article.py b/python_script/generate_article.py index 239bb69..ed20911 100644 --- a/python_script/generate_article.py +++ b/python_script/generate_article.py @@ -72,11 +72,12 @@ def generate_image(path) : write_file(base) def set_foldername(name) : - os.mkdir("../images/"+name) + try : os.mkdir("../images/"+name) + except: pass onlyfiles = [f for f in listdir(".") if isfile(join(".", f))] for f in onlyfiles : if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") : - shutil.move(f, "../images/"+name+"/"+f) + shutil.copy(f, "../images/"+name+"/"+f) def main() : Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing