add: latest article and bug fix to the article generator

This commit is contained in:
2024-10-21 22:49:56 +02:00
parent c30e4dae97
commit 324c288299
12 changed files with 238 additions and 2 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 42 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -0,0 +1,153 @@
from enum import IntEnum
import struct
import argparse
def parse_args() :
parser = argparse.ArgumentParser(description="Disassembles Guardian-rs VM bytecode")
parser.add_argument("--print-pc", "-ppc", action="store_true", default=False,
help="Print the program counter (default: False)")
parser.add_argument("--in", "-i", dest="infile", type=str, required=True,
help="Input bytecode file path")
parser.add_argument("--out", "-o", dest="outfile", type=str, required=True,
help="Output file path")
args = parser.parse_args()
return args
class Opcode(IntEnum):
Const = 0
Load = 1
LoadXmm = 2
Store = 3
StoreXmm = 4
StoreReg = 5
StoreRegZx = 6
Add = 7
Sub = 8
Div = 9
IDiv = 10
Shr = 11
Mul = 12
IMul = 13
And = 14
Or = 15
Xor = 16
Not = 17
Cmp = 18
RotR = 19
RotL = 20
Jmp = 21
Vmctx = 22
VmAdd = 23
VmMul = 24
VmSub = 25
VmReloc = 26
VmExec = 27
VmExit = 28
class OpSize(IntEnum):
Byte = 1
Word = 2
Dword = 4
Qword = 8
class JmpCond(IntEnum):
Jmp = 0
Je = 1
Jne = 2
Jbe = 3
Ja = 4
Jae = 5
Jle = 6
Jg = 7
class Register(IntEnum):
Rax = 0
Rcx = 1
Rdx = 2
Rbx = 3
Rsp = 4
Rbp = 5
Rsi = 6
Rdi = 7
R8 = 8
R9 = 9
R10 = 10
R11 = 11
R12 = 12
R13 = 13
R14 = 14
R15 = 15
def disassemble(program, args):
s = []
pc = 0
last_instr = None
while pc < len(program):
addr_str = f"0x{format(pc, 'x')}: "
op = Opcode(program[pc])
op_size = OpSize(program[pc + 1])
pc += 2
if args.print_pc :
s.append(addr_str)
s.append(op.name)
s.append(op_size.name[0])
#jmp modifies pc as well, was too lazy to implement
if op == Opcode.VmExec:
instr_size = OpSize(program[pc])
pc += 1
pc += instr_size
if op == Opcode.Const or op == Opcode.VmReloc:
value = 0
if op_size == OpSize.Qword:
value = struct.unpack_from('<Q', program, pc)[0]
pc += OpSize.Qword
elif op_size == OpSize.Dword:
value = struct.unpack_from('<I', program, pc)[0]
pc += OpSize.Dword
elif op_size == OpSize.Word:
value = struct.unpack_from('<H', program, pc)[0]
pc += OpSize.Word
elif op_size == OpSize.Byte:
value = program[pc]
pc += OpSize.Byte
if last_instr != None and last_instr == Opcode.Vmctx :
reg_value = (value - 16) // 8
s.append(f" {Register(reg_value).name}")
else :
s.append(f" 0x{format(pc, 'x')}")
if op == Opcode.VmExit:
break
s.append('\n')
last_instr = op
return ''.join(s)
args = parse_args()
with open(args.infile, 'rb') as file:
program = bytearray(file.read())
print(f"processing {args.infile}...")
output = disassemble(program, args)
print(f"writing output to {args.outfile}...")
with open(args.outfile, "w") as out :
out.write(output)
print("done.")

View File

@@ -5,6 +5,26 @@
<!--Articles--> <!--Articles-->
<ul> <ul>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_guardianrs1.html" class="article">Reverse engineering Guardian-rs's virtualization</a></li>
<li><a href="reversing_vac_winapi_hooks.html" class="article">reversing VAC's WinApi hooks</a></li> <li><a href="reversing_vac_winapi_hooks.html" class="article">reversing VAC's WinApi hooks</a></li>
<li><a href="insecure_mode_bypass.html" class="article">-insecure mode bypass in CS:GO</a></li> <li><a href="insecure_mode_bypass.html" class="article">-insecure mode bypass in CS:GO</a></li>
</ul> </ul>

View File

@@ -0,0 +1,62 @@
<!--#include virtual="header.html" -->
<article aria-label="Content" itemscope itemtype="http://schema.org/BlogPosting">
<h1 itemprop="name headline">Reverse engineering Guardian-rs's virtualization</h1>
<time class="mono"> Oct 21, 2024</time>
<main itemprop="articleBody" style="position: relative">
<p>I always wanted to analyze a binary file protected with virtualization, I've read a few papers (some of them I will link at the end of the article). But it always seemed way too hard for me, especially with all of the obfuscation and code mutation commercial virtualizer use.</p><p>I needed something I could learn from and that didn't add much obfuscation to the PE, introducing <a href="https://github.com/vmctx/guardian-rs" target="_blank"> Guardian-rs</a></p><p>This open source project is written in rust and implements a simple stack-based VM.</p><p>Let's begin !</p><p></p>
<h2 id="Protecting-a-function">
<a href="#Protecting-a-function">Protecting a function</a>
</h2>
<p>I created this simple test function that will be virtualized :</p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/calc_function.png" alt="" />
</p>
<p></p><p>Notice the <code class="language-plaintext highlighter-rouge">#pragma optimize("", off)</code><code class="language-plaintext highlighter-rouge">, this way the compiler doesn't optimize out the function.
</code></p><p>Loading up the code in IDA I can see the following assembly code for the <code class="language-plaintext highlighter-rouge">calc</code> function :</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/calc_function_asm.png" alt="" />
</p>
<p></p><p>So far, so good.</p><p>After protecting the function with Guardian-rs (which was actually pretty fast) :</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/calc_entry.png" alt="" />
</p>
<p></p><p>We can see that the code of the function got replaced with a <code class="language-plaintext highlighter-rouge">jmp</code> preceded by a <code class="language-plaintext highlighter-rouge">push</code> .</p><p>What could this possibly mean ??</p><p></p>
<h2 id="Understanding-the-VM-structure">
<a href="#Understanding-the-VM-structure">Understanding the VM structure</a>
</h2>
<p>A typical Virtual machine usually follows this architecture (I stole the image from <a href="https://www.msreverseengineering.com/blog/2018/1/31/finspy-vm-part-2-vm-analysis-and-bytecode-disassembly" target="_blank">msreverseengineering's</a> blog) :</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/VMArch.png" alt="" />
</p>
<p></p><p>Following this pattern, we can enter in the <code class="language-plaintext highlighter-rouge">VMEntry</code> function and continue our analysis :</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/vmentry_firstblock.png" alt="" />
</p>
<p></p><p>We can see that the flags and the registers are pushed onto the stack to keep the program state as it was.</p><p>Then the VM will initialize its internal structures, mainly the <code class="language-plaintext highlighter-rouge">Machine</code> structure which mainly holds :</p><ul><li>The program counter</li><li>The stack pointer</li><li>The registers</li><li>The flags</li></ul><p></p><p>Note that the VM uses <code class="language-plaintext highlighter-rouge">syscalls</code> to <code class="language-plaintext highlighter-rouge">NtAllocateVirtualMemory</code> to allocate its context.</p><p>Then the VM pops the registers and copies them to its internal context structure, this way, the interpreted bytecode can inherit from the "normal" context of the program.</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/vmentry_secondblock.png" alt="" />
</p>
<p></p><p>And finally the VM calls the <code class="language-plaintext highlighter-rouge">VMDispatcher</code> . The role of this function is to read, decode and interpret the opcodes of the function. In its most primitive implementation, it's as simple as a function pointer.</p><p>Before calling the function, the VM pushes the address of the first instruction to be executed onto the stack. Which is the offset pushed by the <code class="language-plaintext highlighter-rouge">calc</code> function (just before the <code class="language-plaintext highlighter-rouge">jmp</code> ), plus the base address of the program (fetched using the Thread Environement Block).</p><p>Following this address we can find the bytecode for our function !</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/function_bytecode.png" alt="" />
</p>
<p></p>
<h2 id="Writing-a-bytecode-disassembler">
<a href="#Writing-a-bytecode-disassembler">Writing a bytecode disassembler</a>
</h2>
<p>At this stage, we'd have to reverse engineer the <code class="language-plaintext highlighter-rouge">VMDispatcher</code> to understand how the bytecode is encoded but I instead decided to cheat and look at the source code of the virtualizer (lol).</p><p>The bytecode follows this simple format :</p><p></p><p><table><tbody><tr><th>Mnemonic (1 byte)</th><th>Size of the data (1 byte) </th><th>Argument (1 byte or more)</th></tr><tr><td>0x16 (Vmctx)</td><td>0x08 (Qword)</td><td>None</td></tr><tr><td>0x00 (Const)</td><td>0x08 (Qword)</td><td>0x20 (Rdx)</td></tr></tbody></table></p><p></p><p>Following this example, we can retrieve the first instructions of our function !</p><p>Using this simple logic and by looking at the existing code of Guardian-rs I could write a disassembler (code <a href="../misc/reversing_guardianrs1/disassembler.py" target="_blank">here</a> ).</p><p>It gave me the following code :</p><p></p>
<p>
<img class="center_image" src="../images/reversing_guardianrs1/disassembled_bytecode_sample.png" alt="" />
</p>
<p></p><p>We can see a few <code class="language-plaintext highlighter-rouge">Vmctx</code> which pushes a pointer to the <code class="language-plaintext highlighter-rouge">Machine</code> struct onto the stack.</p><p>Basically in 8 lines it has copied the content of ~Rdx~ and ~Rsp~ at the top of the stack.</p><p></p>
<h2 id="Conclusion-and-todo-list">
<a href="#Conclusion-and-todo-list">Conclusion and todo list</a>
</h2>
<p>Todo :</p><ul><li>Translate the disassembly into intel x86 assembly</li><li>Automate the process</li><li>Optimize the code</li></ul><p></p><p>There is still work to be done but i'm pretty proud of what I did, especially considering it's my first time reversing a VM.</p><p>I want to thank the author of the Guardian-rs project who kindly responded to my questions and to the people who made the articles I based my work on :</p><ul><li> <a href="https://www.msreverseengineering.com/blog/2018/1/31/finspy-vm-part-2-vm-analysis-and-bytecode-disassembly" target="_blank">msreverseengineering's blog</a> </li><li> <a href="https://github.com/67-6f-64/AntiOreans-CodeDevirtualizer/blob/main/Masters%20Thesis.pdf" target="_blank">Valdemar Carøe's thesis</a> </li></ul><p></p><p>To be continued...</p>
<p></p></main></article>
<!--#include virtual="footer.html" -->
</body>
</html>

View File

@@ -72,11 +72,12 @@ def generate_image(path) :
write_file(base) write_file(base)
def set_foldername(name) : def set_foldername(name) :
os.mkdir("../images/"+name) try : os.mkdir("../images/"+name)
except: pass
onlyfiles = [f for f in listdir(".") if isfile(join(".", f))] onlyfiles = [f for f in listdir(".") if isfile(join(".", f))]
for f in onlyfiles : for f in onlyfiles :
if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") : if f.endswith(".jpg") or f.endswith(".jpeg") or f.endswith(".png") or f.endswith(".ico") or f.endswith(".gif") :
shutil.move(f, "../images/"+name+"/"+f) shutil.copy(f, "../images/"+name+"/"+f)
def main() : def main() :
Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing