add: latest article and bug fix to the article generator
BIN
images/reversing_guardianrs1/VMArch.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
images/reversing_guardianrs1/calc_entry.png
Normal file
|
After Width: | Height: | Size: 2.4 KiB |
BIN
images/reversing_guardianrs1/calc_function.png
Normal file
|
After Width: | Height: | Size: 38 KiB |
BIN
images/reversing_guardianrs1/calc_function_asm.png
Normal file
|
After Width: | Height: | Size: 29 KiB |
BIN
images/reversing_guardianrs1/disassembled_bytecode_sample.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
images/reversing_guardianrs1/function_bytecode.png
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
images/reversing_guardianrs1/vmentry_firstblock.png
Normal file
|
After Width: | Height: | Size: 18 KiB |
BIN
images/reversing_guardianrs1/vmentry_secondblock.png
Normal file
|
After Width: | Height: | Size: 24 KiB |
153
misc/reversing_guardianrs1/disassembler.py
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
from enum import IntEnum
|
||||||
|
import struct
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def parse_args() :
|
||||||
|
parser = argparse.ArgumentParser(description="Disassembles Guardian-rs VM bytecode")
|
||||||
|
|
||||||
|
parser.add_argument("--print-pc", "-ppc", action="store_true", default=False,
|
||||||
|
help="Print the program counter (default: False)")
|
||||||
|
parser.add_argument("--in", "-i", dest="infile", type=str, required=True,
|
||||||
|
help="Input bytecode file path")
|
||||||
|
parser.add_argument("--out", "-o", dest="outfile", type=str, required=True,
|
||||||
|
help="Output file path")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
return args
|
||||||
|
|
||||||
|
class Opcode(IntEnum):
|
||||||
|
Const = 0
|
||||||
|
Load = 1
|
||||||
|
LoadXmm = 2
|
||||||
|
Store = 3
|
||||||
|
StoreXmm = 4
|
||||||
|
StoreReg = 5
|
||||||
|
StoreRegZx = 6
|
||||||
|
Add = 7
|
||||||
|
Sub = 8
|
||||||
|
Div = 9
|
||||||
|
IDiv = 10
|
||||||
|
Shr = 11
|
||||||
|
Mul = 12
|
||||||
|
IMul = 13
|
||||||
|
And = 14
|
||||||
|
Or = 15
|
||||||
|
Xor = 16
|
||||||
|
Not = 17
|
||||||
|
Cmp = 18
|
||||||
|
RotR = 19
|
||||||
|
RotL = 20
|
||||||
|
Jmp = 21
|
||||||
|
Vmctx = 22
|
||||||
|
VmAdd = 23
|
||||||
|
VmMul = 24
|
||||||
|
VmSub = 25
|
||||||
|
VmReloc = 26
|
||||||
|
VmExec = 27
|
||||||
|
VmExit = 28
|
||||||
|
|
||||||
|
|
||||||
|
class OpSize(IntEnum):
|
||||||
|
Byte = 1
|
||||||
|
Word = 2
|
||||||
|
Dword = 4
|
||||||
|
Qword = 8
|
||||||
|
|
||||||
|
|
||||||
|
class JmpCond(IntEnum):
|
||||||
|
Jmp = 0
|
||||||
|
Je = 1
|
||||||
|
Jne = 2
|
||||||
|
Jbe = 3
|
||||||
|
Ja = 4
|
||||||
|
Jae = 5
|
||||||
|
Jle = 6
|
||||||
|
Jg = 7
|
||||||
|
|
||||||
|
|
||||||
|
class Register(IntEnum):
|
||||||
|
Rax = 0
|
||||||
|
Rcx = 1
|
||||||
|
Rdx = 2
|
||||||
|
Rbx = 3
|
||||||
|
Rsp = 4
|
||||||
|
Rbp = 5
|
||||||
|
Rsi = 6
|
||||||
|
Rdi = 7
|
||||||
|
R8 = 8
|
||||||
|
R9 = 9
|
||||||
|
R10 = 10
|
||||||
|
R11 = 11
|
||||||
|
R12 = 12
|
||||||
|
R13 = 13
|
||||||
|
R14 = 14
|
||||||
|
R15 = 15
|
||||||
|
|
||||||
|
def disassemble(program, args):
|
||||||
|
s = []
|
||||||
|
pc = 0
|
||||||
|
last_instr = None
|
||||||
|
|
||||||
|
while pc < len(program):
|
||||||
|
addr_str = f"0x{format(pc, 'x')}: "
|
||||||
|
|
||||||
|
op = Opcode(program[pc])
|
||||||
|
op_size = OpSize(program[pc + 1])
|
||||||
|
|
||||||
|
pc += 2
|
||||||
|
|
||||||
|
if args.print_pc :
|
||||||
|
s.append(addr_str)
|
||||||
|
s.append(op.name)
|
||||||
|
s.append(op_size.name[0])
|
||||||
|
|
||||||
|
#jmp modifies pc as well, was too lazy to implement
|
||||||
|
|
||||||
|
if op == Opcode.VmExec:
|
||||||
|
instr_size = OpSize(program[pc])
|
||||||
|
pc += 1
|
||||||
|
pc += instr_size
|
||||||
|
|
||||||
|
if op == Opcode.Const or op == Opcode.VmReloc:
|
||||||
|
value = 0
|
||||||
|
if op_size == OpSize.Qword:
|
||||||
|
value = struct.unpack_from('<Q', program, pc)[0]
|
||||||
|
pc += OpSize.Qword
|
||||||
|
elif op_size == OpSize.Dword:
|
||||||
|
value = struct.unpack_from('<I', program, pc)[0]
|
||||||
|
pc += OpSize.Dword
|
||||||
|
elif op_size == OpSize.Word:
|
||||||
|
value = struct.unpack_from('<H', program, pc)[0]
|
||||||
|
pc += OpSize.Word
|
||||||
|
elif op_size == OpSize.Byte:
|
||||||
|
value = program[pc]
|
||||||
|
pc += OpSize.Byte
|
||||||
|
|
||||||
|
if last_instr != None and last_instr == Opcode.Vmctx :
|
||||||
|
reg_value = (value - 16) // 8
|
||||||
|
s.append(f" {Register(reg_value).name}")
|
||||||
|
else :
|
||||||
|
s.append(f" 0x{format(pc, 'x')}")
|
||||||
|
|
||||||
|
if op == Opcode.VmExit:
|
||||||
|
break
|
||||||
|
|
||||||
|
s.append('\n')
|
||||||
|
|
||||||
|
last_instr = op
|
||||||
|
|
||||||
|
return ''.join(s)
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_args()
|
||||||
|
with open(args.infile, 'rb') as file:
|
||||||
|
program = bytearray(file.read())
|
||||||
|
|
||||||
|
print(f"processing {args.infile}...")
|
||||||
|
output = disassemble(program, args)
|
||||||
|
|
||||||
|
print(f"writing output to {args.outfile}...")
|
||||||
|
with open(args.outfile, "w") as out :
|
||||||
|
out.write(output)
|
||||||
|
|
||||||
|
print("done.")
|
||||||
@@ -5,6 +5,26 @@
|
|||||||
|
|
||||||
<!--Articles-->
|
<!--Articles-->
|
||||||
<ul>
|
<ul>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
|
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
|
||||||
<li><a href="reversing_vac_winapi_hooks.html" class="article">reversing VAC's WinApi hooks</a></li>
|
<li><a href="reversing_vac_winapi_hooks.html" class="article">reversing VAC's WinApi hooks</a></li>
|
||||||
<li><a href="insecure_mode_bypass.html" class="article">-insecure mode bypass in CS:GO</a></li>
|
<li><a href="insecure_mode_bypass.html" class="article">-insecure mode bypass in CS:GO</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
|
|||||||
62
pages/reversing_guardianrs1.html
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
|
||||||
|
<!--#include virtual="header.html" -->
|
||||||
|
<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">
|
||||||
|
<h1 itemprop="name headline">Reverse engineering Guardian-rs's virtualization</h1>
|
||||||
|
|
||||||
|
<time class="mono"> Oct 21, 2024</time>
|
||||||
|
|
||||||
|
<main itemprop="articleBody" style="position: relative">
|
||||||
|
<p>I always wanted to analyze a binary file protected with virtualization, I've read a few papers (some of them I will link at the end of the article). But it always seemed way too hard for me, especially with all of the obfuscation and code mutation commercial virtualizer use.</p><p>I needed something I could learn from and that didn't add much obfuscation to the PE, introducing <a href="https://github.com/vmctx/guardian-rs" target="_blank"> Guardian-rs</a></p><p>This open source project is written in rust and implements a simple stack-based VM.</p><p>Let's begin !</p><p></p>
|
||||||
|
<h2 id="Protecting-a-function">
|
||||||
|
<a href="#Protecting-a-function">Protecting a function</a>
|
||||||
|
</h2>
|
||||||
|
<p>I created this simple test function that will be virtualized :</p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/calc_function.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>Notice the <code class="language-plaintext highlighter-rouge">#pragma optimize("", off)</code><code class="language-plaintext highlighter-rouge">, this way the compiler doesn't optimize out the function.
|
||||||
|
</code></p><p>Loading up the code in IDA I can see the following assembly code for the <code class="language-plaintext highlighter-rouge">calc</code> function :</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/calc_function_asm.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>So far, so good.</p><p>After protecting the function with Guardian-rs (which was actually pretty fast) :</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/calc_entry.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>We can see that the code of the function got replaced with a <code class="language-plaintext highlighter-rouge">jmp</code> preceded by a <code class="language-plaintext highlighter-rouge">push</code> .</p><p>What could this possibly mean ??</p><p></p>
|
||||||
|
<h2 id="Understanding-the-VM-structure">
|
||||||
|
<a href="#Understanding-the-VM-structure">Understanding the VM structure</a>
|
||||||
|
</h2>
|
||||||
|
<p>A typical Virtual machine usually follows this architecture (I stole the image from <a href="https://www.msreverseengineering.com/blog/2018/1/31/finspy-vm-part-2-vm-analysis-and-bytecode-disassembly" target="_blank">msreverseengineering's</a> blog) :</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/VMArch.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>Following this pattern, we can enter in the <code class="language-plaintext highlighter-rouge">VMEntry</code> function and continue our analysis :</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/vmentry_firstblock.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>We can see that the flags and the registers are pushed onto the stack to keep the program state as it was.</p><p>Then the VM will initialize its internal structures, mainly the <code class="language-plaintext highlighter-rouge">Machine</code> structure which mainly holds :</p><ul><li>The program counter</li><li>The stack pointer</li><li>The registers</li><li>The flags</li></ul><p></p><p>Note that the VM uses <code class="language-plaintext highlighter-rouge">syscalls</code> to <code class="language-plaintext highlighter-rouge">NtAllocateVirtualMemory</code> to allocate its context.</p><p>Then the VM pops the registers and copies them to its internal context structure, this way, the interpreted bytecode can inherit from the "normal" context of the program.</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/vmentry_secondblock.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>And finally the VM calls the <code class="language-plaintext highlighter-rouge">VMDispatcher</code> . The role of this function is to read, decode and interpret the opcodes of the function. In its most primitive implementation, it's as simple as a function pointer.</p><p>Before calling the function, the VM pushes the address of the first instruction to be executed onto the stack. Which is the offset pushed by the <code class="language-plaintext highlighter-rouge">calc</code> function (just before the <code class="language-plaintext highlighter-rouge">jmp</code> ), plus the base address of the program (fetched using the Thread Environement Block).</p><p>Following this address we can find the bytecode for our function !</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/function_bytecode.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p>
|
||||||
|
<h2 id="Writing-a-bytecode-disassembler">
|
||||||
|
<a href="#Writing-a-bytecode-disassembler">Writing a bytecode disassembler</a>
|
||||||
|
</h2>
|
||||||
|
<p>At this stage, we'd have to reverse engineer the <code class="language-plaintext highlighter-rouge">VMDispatcher</code> to understand how the bytecode is encoded but I instead decided to cheat and look at the source code of the virtualizer (lol).</p><p>The bytecode follows this simple format :</p><p></p><p><table><tbody><tr><th>Mnemonic (1 byte)</th><th>Size of the data (1 byte) </th><th>Argument (1 byte or more)</th></tr><tr><td>0x16 (Vmctx)</td><td>0x08 (Qword)</td><td>None</td></tr><tr><td>0x00 (Const)</td><td>0x08 (Qword)</td><td>0x20 (Rdx)</td></tr></tbody></table></p><p></p><p>Following this example, we can retrieve the first instructions of our function !</p><p>Using this simple logic and by looking at the existing code of Guardian-rs I could write a disassembler (code <a href="../misc/reversing_guardianrs1/disassembler.py" target="_blank">here</a> ).</p><p>It gave me the following code :</p><p></p>
|
||||||
|
<p>
|
||||||
|
<img class="center_image" src="../images/reversing_guardianrs1/disassembled_bytecode_sample.png" alt="" />
|
||||||
|
</p>
|
||||||
|
<p></p><p>We can see a few <code class="language-plaintext highlighter-rouge">Vmctx</code> which pushes a pointer to the <code class="language-plaintext highlighter-rouge">Machine</code> struct onto the stack.</p><p>Basically in 8 lines it has copied the content of ~Rdx~ and ~Rsp~ at the top of the stack.</p><p></p>
|
||||||
|
<h2 id="Conclusion-and-todo-list">
|
||||||
|
<a href="#Conclusion-and-todo-list">Conclusion and todo list</a>
|
||||||
|
</h2>
|
||||||
|
<p>Todo :</p><ul><li>Translate the disassembly into intel x86 assembly</li><li>Automate the process</li><li>Optimize the code</li></ul><p></p><p>There is still work to be done but i'm pretty proud of what I did, especially considering it's my first time reversing a VM.</p><p>I want to thank the author of the Guardian-rs project who kindly responded to my questions and to the people who made the articles I based my work on :</p><ul><li> <a href="https://www.msreverseengineering.com/blog/2018/1/31/finspy-vm-part-2-vm-analysis-and-bytecode-disassembly" target="_blank">msreverseengineering's blog</a> </li><li> <a href="https://github.com/67-6f-64/AntiOreans-CodeDevirtualizer/blob/main/Masters%20Thesis.pdf" target="_blank">Valdemar Carøe's thesis</a> </li></ul><p></p><p>To be continued...</p>
|
||||||
|
<p></p></main></article>
|
||||||
|
<!--#include virtual="footer.html" -->
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -72,11 +72,12 @@ def generate_image(path) :
|
|||||||
write_file(base)
|
write_file(base)
|
||||||
|
|
||||||
def set_foldername(name) :
|
def set_foldername(name) :
|
||||||
os.mkdir("../images/"+name)
|
try : os.mkdir("../images/"+name)
|
||||||
|
except: pass
|
||||||
onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
|
onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
|
||||||
for f in onlyfiles :
|
for f in onlyfiles :
|
||||||
if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") :
|
if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") :
|
||||||
shutil.move(f, "../images/"+name+"/"+f)
|
shutil.copy(f, "../images/"+name+"/"+f)
|
||||||
|
|
||||||
def main() :
|
def main() :
|
||||||
Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
|
Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing
|
||||||
|
|||||||