Skip to content

标题: LLVM Pass转储类或结构的内存布局

创建: 2024-12-01 19:55 更新: 2024-12-09 15:05 链接: https://scz.617.cn/unix/202412011955.txt https://bbs.kanxue.com/thread-284643.htm


目录:

☆ 背景介绍
☆ dumpclass.cpp
☆ dumptarget.cpp
☆ 用dumpclass.so处理dumptarget.cpp
☆ pahole
☆ clang -Xclang -fdump-record-layouts
☆ VC有隐藏选项

☆ 背景介绍

有次因故需要了解std::string类型内存布局,简单折腾一番,分享了一篇

《GDB查看结构或类的内存布局及分离终端》 https://scz.617.cn/unix/202411151604.txt

bluerust随即让我看下面这篇

STL容器逆向与实战 - [2023-02-07] https://mp.weixin.qq.com/s/bfzeGbieYWaPS3_iB-gSeg

他的原话是,主要看"llvm pass dump data type"。看了这篇,于我而言,属于"每 个字都认识"系列,大概明白其基本原理是啥,但完全不了解所涉及的"LLVM Pass"技 术,看过之后,老虎吃天、无处下爪。我不会C++编程,基本未碰上过C++ STL容器逆 向需求,不在意上文中那些具体容器的实现细节。我感兴趣的是,如何转储类或结构 的内存布局,也就是上文第一部分的内容。原作者有句话,随便简单写个pass来dump, 深深刺激了我,别人随便简单弄的东西,代码都给了,我还是不知如何实践。或许有 些同道遭遇类似囧境,本文面向"LLVM Pass"小白提供完整可操作示例,聚焦"转储内 存布局",是上文降阶后的狗尾续貂、画蛇添足。

☆ dumpclass.cpp

参看


Writing an LLVM Pass (legacy PM version) https://llvm.org/docs/WritingAnLLVMPass.html

Writing an LLVM Pass https://llvm.org/docs/WritingAnLLVMNewPMPass.html


看雪那篇是Legacy格式的"LLVM Pass",此处dumpclass.cpp改写成New格式。支持两 个命令行参数,允许成员名中包含相对偏移或绝对偏移,允许过滤类或结构名。


include "llvm/Passes/PassBuilder.h"

include "llvm/Passes/PassPlugin.h"

include "llvm/Support/raw_ostream.h"

define DEFAULTSUBSTR ""

using namespace llvm;

namespace {

static cl::opt passmode ( "passmode", cl::desc("absolute offset or not"), cl::value_desc("int"), cl::init(0) );

static cl::opt substr ( "substr", cl::desc("part of struct name"), cl::value_desc("std::string"), cl::init(DEFAULTSUBSTR) );

struct DumpClass : PassInfoMixin {

std::string getTypeName ( Type *type, const DataLayout &data )
{
    if ( type->isIntegerTy() )
    {
        IntegerType    *i   = cast<IntegerType>( type );

        return "uint" + std::to_string( i->getBitWidth() ) + "_t";
    }
    else if ( type->isPointerTy() )
    {
        PointerType    *ptr = cast<PointerType>( type );

        return getTypeName( ptr->getPointerElementType(), data ) + "*";
    }
    else if ( type->isArrayTy() )
    {
        ArrayType      *arr = cast<ArrayType>( type );

        return getTypeName( arr->getArrayElementType(), data ) + "[" + std::to_string( arr->getArrayNumElements() ) + "]";
    }
    else if ( type->isFloatTy() )
    {
        return "float";
    }
    else if ( type->isStructTy() )
    {
        StructType     *stc = cast<StructType>( type );

        return std::string( stc->getStructName() );
    }
    else
    {
        return "unknown_" + std::to_string( data.getTypeAllocSizeInBits( type ) );
    }
}

void dumpType ( int depth, Type *type, const std::string &suffix, const DataLayout *data, unsigned base, int mode )
{
    std::string blank( depth * 4, ' ' );

    if ( type->isStructTy() )
    {
        StructType         *stc = cast<StructType>( type );
        const StructLayout *sl  = data->getStructLayout( stc );

        errs() << blank + stc->getStructName() + "\n" + blank + "{\n";
        for ( size_t i = 0; i < stc->getStructNumElements(); i++ )
        {
            Type       *subType = stc->getStructElementType( i );
            unsigned    offset  = sl->getElementOffset( i );
            unsigned    size    = data->getTypeAllocSize( subType );

            if ( mode > 0 )
            {
                offset += base;
                dumpType( depth+1, subType, std::to_string(offset)+"_"+std::to_string(size), data, offset, mode );
            }
            else
            {
                dumpType( depth+1, subType, std::to_string(offset)+"_"+std::to_string(size), data, 0, mode );
            }
        }
        errs() << blank + "} field_" + suffix + ";\n";
    }
    else
    {
        errs() << blank + getTypeName( type, *data ) + " field_" + suffix + ";\n";
    }
}

void visitor ( Function &F )
{
    if ( F.getName() != "main" )
    {
        return;
    }

    std::set<StructType*>   types;
    const DataLayout       &data    = F.getParent()->getDataLayout();

    for ( auto &B : F )
    {
        for ( auto &I : B )
        {
            if ( auto *A = dyn_cast<AllocaInst>( &I ) )
            {
                Type   *type    = A->getAllocatedType();
                if ( type->isStructTy() )
                {
                    StructType *stc = cast<StructType>( type );

                    if ( stc->isOpaque() )
                    {
                        continue;
                    }
                    std::string struct_name
                                    = std::string( stc->getStructName() );
                    if ( substr != DEFAULTSUBSTR && struct_name.find( substr ) == std::string::npos )
                    {
                        continue;
                    }
                    types.insert( stc );
                }
            }
        }
    }

    int                     index = 0;

    for ( StructType *type : types )
    {
        dumpType( 0, type, std::to_string( index++ ), &data, 0, passmode );
    }
}

PreservedAnalyses run ( Function &F, FunctionAnalysisManager &FAM )
{
    visitor( F );
    return PreservedAnalyses::all();
}

};

}

PassPluginLibraryInfo getDumpClassPluginInfo () { const auto callback = [] ( PassBuilder &PB ) { PB.registerPipelineParsingCallback ( [] ( StringRef Name, FunctionPassManager &FPM, ArrayRef ) { if ( Name == "DumpClass" ) { FPM.addPass( DumpClass() ); return true; } return false; } ); PB.registerPipelineStartEPCallback ( [&] ( ModulePassManager &MPM, auto ) { FunctionPassManager FPM;

            FPM.addPass( DumpClass() );
            MPM.addPass( createModuleToFunctionPassAdaptor( std::move( FPM ) ) );
            return true;
        }
    );
};

return { LLVM_PLUGIN_API_VERSION, "DumpClass", LLVM_VERSION_STRING, callback };

}

extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo () { return getDumpClassPluginInfo(); }


从dumpclass.cpp生成dumpclass.so

clang-14 \ $(llvm-config-14 --cxxflags) \ -Wall -pipe \ -fPIC -shared -Wl,-soname,dumpclass.so \ -O3 -s \ -o dumpclass.so dumpclass.cpp

后面会演示如何将dumpclass.so用作"LLVM Pass"来转储类或结构的内存布局。

☆ dumptarget.cpp

dumptarget.cpp是假想的目标程序,将来根据dumptarget.cpp转储其中的类或结构。


include

include

include

include

include

class TargetClass { private: std::string unused; public: std::deque>> myDeque; std::map> myMap; };

int main ( int argc, char * argv[] ) { TargetClass obj;

obj.myMap[1]["one"] = 1;
obj.myMap[2]["two"] = 2;

obj.myDeque.push_back( obj.myMap );

for ( const auto &d : obj.myDeque )
{
    for ( const auto &pair : d )
    {
        std::cout << "Key : " << pair.first << " -> Value : ";
        for ( const auto &innerpair : pair.second )
        {
            std::cout << innerpair.first << " -> " << innerpair.second;
        }
        std::cout << std::endl;
    }
}

return 0;

}

☆ 用dumpclass.so处理dumptarget.cpp

有多种办法加载dumpclass.so,此处演示其中之一,依次执行这两条命令

clang-14 \ -Wall -pipe -S -emit-llvm \ -Xclang -disable-O0-optnone \ -o dumptarget.ll dumptarget.cpp

opt-14 \ -disable-output \ -load ./dumpclass.so -load-pass-plugin ./dumpclass.so \ -passes=DumpClass -passmode=1 -substr="::basic_string" \ dumptarget.ll 2>&1 | less

先从dumptarget.cpp生成dumptarget.ll,再用dumpclass.so处理dumptarget.ll。正 常情况下会得到


class.std::__cxx11::basic_string { struct.std::__cxx11::basic_string::_Alloc_hider { uint8_t* field_0_8; } field_0_8; uint64_t field_8_8; union.anon { uint64_t field_16_8; uint8_t[8] field_24_8; } field_16_16; } field_0;


尝试不给opt指定passmode、substr参数,观察输出,加强理解。

☆ pahole

pahole也能转储类或结构的内存布局,不如dumpclass.cpp,出于完备性写在此处。

g++ -Wall -pipe -std=c++11 -O0 -g -o dumptarget_dbg dumptarget.cpp

pahole --hex -E -M -C TargetClass dumptarget_dbg | grep -A 25 "class basic_string"

正常情况下会得到


/ typedef string / class basic_string, std::allocator\ > { struct _Alloc_hider : allocator { /* class allocator : public new_allocator { public:

                    /* class new_allocator<char> {
                    public:

                    }<ancestor>; */ /*     0     0 */

                    /* XXX last struct has 1 byte of padding */
            }<ancestor>; */ /*     0   0x1 */

            /* XXX last struct has 1 byte of padding */
            /* XXX 65535 bytes hole, try to pack */

            /* typedef pointer -> pointer -> pointer */ char *     _M_p;     /*     0   0x8 */
    }_M_dataplus; /*     0   0x8 */
    /* typedef size_type -> size_type -> size_type -> size_t */ long unsigned int  _M_string_length; /*   0x8   0x8 */
    union {
            char       _M_local_buf[16];                                     /*  0x10  0x10 */
            /* typedef size_type -> size_type -> size_type -> size_t */ long unsigned int _M_allocated_capacity; /*  0x10   0x8 */
    };                                                                       /*  0x10  0x10 */

public:

} unused; / 0 0x20 /

☆ clang -Xclang -fdump-record-layouts

clang -Xclang -fdump-record-layouts dumptarget.cpp 2> /dev/null | grep -A 10 "0 | class std::basic_string" | less

正常情况下会得到


0 | class std::basic_string 0 | struct std::basic_string::_Alloc_hider _M_dataplus 0 | class std::allocator (base) (empty) 0 | class __gnu_cxx::new_allocator (base) (empty) 0 | std::basic_string::pointer _M_p 8 | std::basic_string::size_type _M_string_length 16 | union std::basic_string::(anonymous at /usr/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:179:7) 16 | char[16] _M_local_buf 16 | std::basic_string::size_type _M_allocated_capacity | [sizeof=32, dsize=32, align=8, | nvsize=32, nvalign=8] ...


☆ VC有隐藏选项

假设VirtualBaseClass.cpp如下


include

include

class Base { public: int x; };

class Derived1 : virtual public Base { public: int y; };

class Derived2 : virtual public Base { public: int z; };

class Multiple : public Derived1, public Derived2 { public: int w; };

int __cdecl main ( int argc, char * argv[] ) { Multiple m;

m.x = 10;
return 0;

}

VC编译时有隐藏选项,查看C++类的内存布局

cl /d1reportSingleClassLayoutBase VirtualBaseClass.cpp cl /d1reportSingleClassLayoutDerived1 VirtualBaseClass.cpp cl /d1reportSingleClassLayoutDerived2 VirtualBaseClass.cpp cl /d1reportSingleClassLayoutDerived VirtualBaseClass.cpp (子串匹配) cl /d1reportSingleClassLayoutMultiple VirtualBaseClass.cpp cl /d1reportAllClassLayout VirtualBaseClass.cpp (输出太多,慎用)

用ASCII图显示内存布局,向stdout输出,不影响其他编译选项。