UE4 stats性能埋点

某个Stats文件所统计到大类(Group Name)如下:

某个Stats文件Group Name为Memory的所统计到细项如下:

统计项类型:

为int或float数字类型   // 用于Stat HUD展示,如下图所示

Stat GPU // 显示帧的GPU统计数据   注:android平台上没有输出

为Memory类型  // 用于Stat HUD展示,如下图所示

Stat Memory // 显示有关虚幻引擎中各个子系统使用多少内存的统计数据

为hierarchy类别,可以嵌套子节点,包含CallCount、InclusiveTime、ExclusiveTime等字段   // 用于Stat HUD展示和Profiler工具CallStack展示  如下图所示

Stat Component   // 显示组件列表及组件性能信息

Profiler工具CallStack展示

本文重点讲述如何用自定义hierarchy类别来埋点,并在Profiler工具的CallStack树中查看数据。

定义分组

DECLARE_STATS_GROUP(TEXT("AI"),STATGROUP_AI, STATCAT_Advanced);   // 3个参数分别对应Description、GroupName、GroupCategory

// 展开后为如下结构体:
struct FStatGroup_STATGROUP_AI
{
    enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_AI"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"AI"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

DECLARE_STATS_GROUP_VERBOSE(TEXT("LoadTimeVerbose"), STATGROUP_LoadTimeVerbose, STATCAT_Advanced);  // 缺省不开启统计

// 展开后为如下结构体:
struct FStatGroup_STATGROUP_LoadTimeVerbose
{
    enum { DefaultEnable = false, CompileTimeEnable = true, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_LoadTimeVerbose"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"LoadTimeVerbose"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

DECLARE_STATS_GROUP_SORTBYNAME(TEXT("Streaming Overview"),STATGROUP_StreamingOverview, STATCAT_Advanced); // 会进行排序,消耗会大一些

// 展开后为如下结构体:
struct FStatGroup_STATGROUP_StreamingOverview
{
    enum { DefaultEnable = true, CompileTimeEnable = true, SortByName = true };

    static __forceinline const char* GetGroupName() { return "STATGROUP_StreamingOverview"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"Streaming Overview"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

DECLARE_STATS_GROUP_MAYBE_COMPILED_OUT(TEXT("SlateVeryVerbose"), STATGROUP_SlateVeryVerbose, STATCAT_Advanced, WITH_VERY_VERBOSE_SLATE_STATS);  // 缺省不开启统计  宏WITH_VERY_VERBOSE_SLATE_STATS为0:表示不编译该统计项的逻辑

// 展开后为如下结构体:
struct FStatGroup_STATGROUP_SlateVeryVerbose
{
    enum { DefaultEnable = false, CompileTimeEnable = 0, SortByName = false };

    static __forceinline const char* GetGroupName() { return "STATGROUP_SlateVeryVerbose"; }
    static __forceinline const char* GetGroupCategory() { return "STATCAT_Advanced"; }
    static __forceinline const TCHAR* GetDescription() { return L"SlateVeryVerbose"; }
    static __forceinline bool IsDefaultEnabled() { return (bool)DefaultEnable; }
    static __forceinline bool IsCompileTimeEnable() { return (bool)CompileTimeEnable; }
    static __forceinline bool GetSortByName() { return (bool)SortByName; }
};;;

定义埋点方式1

在cpp代码的全局区域,定义埋点结构体类型和static全局变量

DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 3个参数分别对应Description、埋点结构体类型、GroupName

// 宏展开后为:
struct FStat_STAT_Test1
{
    typedef FStatGroup_STATGROUP_TestGroup TGroup;
    static __forceinline const char* GetStatName() { return "STAT_Test1"; }
    static __forceinline const TCHAR* GetDescription() { return L"Test1"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;

static struct FThreadSafeStaticStat<FStat_STAT_Test1> StatPtr_STAT_Test1;;

在函数中插入埋点

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组

DECLARE_CYCLE_STAT(TEXT("Test1"), STAT_Test1, STATGROUP_TestGroup); // 定义Test1的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test2"), STAT_Test2, STATGROUP_TestGroup); // 定义Test2的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test3"), STAT_Test3, STATGROUP_TestGroup); // 定义Test3的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test4"), STAT_Test4, STATGROUP_TestGroup); // 定义Test4的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test5"), STAT_Test5, STATGROUP_TestGroup); // 定义Test5的埋点类型与static埋点变量,并放在TestGroup分组中 DECLARE_CYCLE_STAT(TEXT("Test6"), STAT_Test6, STATGROUP_TestGroup); // 定义Test6的埋点类型与static埋点变量,并放在TestGroup分组中 // LoopCall(1)在我的电脑耗时约为16ms #define LoopCall(n) { uint64 sum = 1; for (int32 i = 1; i < 10000000*n; i++) { sum *= i; } } void AMyTest1Character::StatTest() { SCOPE_CYCLE_COUNTER(STAT_Test1); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test1((StatPtr_STAT_Test1.GetStatId()));; FPlatformProcess::Sleep(0.002); // 统计到CPU Stall - Sleep
// 条件埋点: 条件成立时,才会埋点成功
int a = 100; CONDITIONAL_SCOPE_CYCLE_COUNTER(STAT_Test2, a > 50); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test2(a > 50 ? (StatPtr_STAT_Test2.GetStatId()) : TStatId());; LoopCall(1); SCOPE_CYCLE_COUNTER(STAT_Test3); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test3((StatPtr_STAT_Test3.GetStatId()));; FPlatformProcess::Sleep(0.005); // 统计到CPU Stall - Sleep { SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; } FPlatformProcess::Sleep(0.003); // 统计到CPU Stall - Sleep SCOPE_CYCLE_COUNTER(STAT_Test4); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test4((StatPtr_STAT_Test4.GetStatId()));; { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; LoopCall(2); } { SCOPE_CYCLE_COUNTER(STAT_Test5); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test5((StatPtr_STAT_Test5.GetStatId()));; FPlatformProcess::SleepNoStats(0.5); // 会被统计到当前作用域埋点的IncTime中 } SCOPE_CYCLE_COUNTER(STAT_Test6); // 宏展开后的代码为:FScopeCycleCounter CycleCount_STAT_Test6((StatPtr_STAT_Test6.GetStatId()));; }

定义埋点方式2

相比方式1,该方式不需要提前定义埋点类型,比较方便

DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC)展开为如下代码:

struct FStat_STAT_UnhashUnreachableObjects
{
    typedef FStatGroup_STATGROUP_GC TGroup;
    static __forceinline const char* GetStatName() { return "STAT_UnhashUnreachableObjects"; }
    static __forceinline const TCHAR* GetDescription() { return L"UnhashUnreachableObjects"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;
static struct FThreadSafeStaticStat<FStat_STAT_UnhashUnreachableObjects> StatPtr_STAT_UnhashUnreachableObjects;
FScopeCycleCounter CycleCount_STAT_UnhashUnreachableObjects((StatPtr_STAT_UnhashUnreachableObjects.GetStatId()));;

在函数中,定义埋点结构体类型和static局部变量,并插入埋点

bool UnhashUnreachableObjects(bool bUseTimeLimit, float TimeLimit)
{
    DECLARE_SCOPE_CYCLE_COUNTER(TEXT("UnhashUnreachableObjects"), STAT_UnhashUnreachableObjects, STATGROUP_GC);
    
    // ... ...
}

定义埋点方式3

在DECLARE_SCOPE_CYCLE_COUNTER基础上封装,放到Quick分组下,更易于使用

QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);  // 即:DECLARE_SCOPE_CYCLE_COUNTER(TEXT("STAT_QuickTest1"),STAT_QuickTest1,STATGROUP_Quick)

                                                                                              // 用宏QUICK_SCOPE_CYCLE_COUNTER定义的埋点会放到GroupName为FStatGroup_STATGROUP_Quick下

QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1)展开为:

struct FStat_STAT_QuickTest1
{
    typedef FStatGroup_STATGROUP_Quick TGroup;
    static __forceinline const char* GetStatName() { return "STAT_QuickTest1"; }
    static __forceinline const TCHAR* GetDescription() { return L"STAT_QuickTest1"; }
    static __forceinline EStatDataType::Type GetStatType() { return EStatDataType::ST_int64; }
    static __forceinline bool IsClearEveryFrame() { return true; }
    static __forceinline bool IsCycleStat() { return true; }

    static __forceinline FPlatformMemory::EMemoryCounterRegion GetMemoryRegion()
    {
        return FPlatformMemory::MCR_Invalid;
    }
};;
static struct FThreadSafeStaticStat<FStat_STAT_QuickTest1> StatPtr_STAT_QuickTest1;
FScopeCycleCounter CycleCount_STAT_QuickTest1((StatPtr_STAT_QuickTest1.GetStatId()));;

在函数中,定义埋点结构体类型和static局部变量,并插入埋点

void AMyTest1Character::StatTest()
{
    QUICK_SCOPE_CYCLE_COUNTER(STAT_QuickTest1);

    // ... ...
}

对UObject对象埋点

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组

DECLARE_CYCLE_STAT(TEXT("ObjTest1"), STAT_ObjTest1, STATGROUP_TestGroup); // 定义ObjTest1的埋点类型与static埋点变量,并放在TestGroup分组中
DECLARE_CYCLE_STAT(TEXT("ObjTest2"), STAT_ObjTest2, STATGROUP_TestGroup); // 定义ObjTest2的埋点类型与static埋点变量,并放在TestGroup分组中

void AMyTest1Character::StatTest()
{
    FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
    UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath);
    UMyBPObject* BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);

    FString TexturePath1 = TEXT("/Engine/EngineMaterials/DefaultDiffuse_TC_Masks");
    UTexture2D* TextureObj1 = LoadObject<UTexture2D>(nullptr, *TexturePath1);

    {
        FScopeCycleCounterUObject ObjScope(MyBPObjectClass);

        LoopCall(1);

        FPlatformProcess::Sleep(0.002);

        {
            FScopeCycleCounterUObject ObjScope2(MyBPObjectClass);

            LoopCall(1);

            {
// 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest1.GetStatId())
TStatId StatBPObj1 = GET_STATID(STAT_ObjTest1); FScopeCycleCounterUObject ObjScope3(BPObj1, StatBPObj1); FPlatformProcess::SleepNoStats(
0.5); } {
// 通过GET_STATID宏来获取,会被展开为:(StatPtr_STAT_ObjTest2.GetStatId()) FScopeCycleCounterUObject ObjScope4(
this, GET_STATID(STAT_ObjTest2)); FPlatformProcess::Sleep(0.003); } } LoopCall(2); // 动态创建TStatId对象 TStatId StatObjTest3 = FDynamicStats::CreateStatId<FStatGroup_STATGROUP_TestGroup>(FString(TEXT("ObjTest3"))); FScopeCycleCounterUObject ObjScope3(MyBPObjectClass, StatObjTest3); } }

即使对UObject指定了埋点类型,但对UObject的统计会放到STATGROUP_UObjects分组中,如下所示:

Tickable对象的Tick耗时

UCLASS()
class UMyBPObject : public UObject, public FTickableGameObject
{
    GENERATED_BODY()
public:
    UMyBPObject();
    ~UMyBPObject();

    virtual TStatId GetStatId() const override
    {
        RETURN_QUICK_DECLARE_CYCLE_STAT(MyBPObject, STATGROUP_Tickables); // 如果不希望被统计,直接返回return TStatId();即可
    }
    virtual bool IsTickable() const override { return !this->IsDefaultSubobject(); }
    virtual void Tick(float DeltaTime) override 
    {
        if (GFrameCounter % 300 == 0)
        {
            FPlatformProcess::SleepNoStats(0.03);
        }
    }
};


void AMyTest1Character::StatTest()
{
    FString MyBPObjectPath = TEXT("/Game/ThirdPersonCPP/Blueprints/MyBlueprintObject.MyBlueprintObject_C");
    UClass* MyBPObjectClass = LoadClass<UObject>(nullptr, *MyBPObjectPath); // MyBPObjectClass为UBlueprintGeneratedClass*类型

    // 创建UMyBPObject对象,并赋值给成员变量UMyBPObject* m_BPObj1
    m_BPObj1 = NewObject<UMyBPObject>(this, MyBPObjectClass);
}

Tickable对象一般会放在STATGROUP_Tickables分组中。以下为某个Stats文件收集到的Tickable对象名称:

Task执行任务的耗时

DECLARE_STATS_GROUP(TEXT("TestGroupDesc"), STATGROUP_TestGroup, STATCAT_Advanced); // 定义名为TestGroup的分组

class FMyTestTask
{
public:
    FMyTestTask()
    {
    }
    static const TCHAR* GetTaskName()
    {
        return TEXT("FMyTestTask");
    }
    FORCEINLINE static TStatId GetStatId()
    {
        RETURN_QUICK_DECLARE_CYCLE_STAT(FMyTestTask, STATGROUP_TestGroup);
    }
    /** return the thread for this task **/
    static ENamedThreads::Type GetDesiredThread()
    {
        return ENamedThreads::AnyThread;
    }

    static ESubsequentsMode::Type GetSubsequentsMode()
    {
        return ESubsequentsMode::TrackSubsequents;
    }

    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        LoopCall(5);
    }
};

void AMyTest1Character::StatTest()
{
    FGraphEventRef MyTestTaskEvent = TGraphTask<FMyTestTask>::CreateTask().ConstructAndDispatchWhenReady();
// 当前线程挂起,等待Task任务执行完成 FTaskGraphInterface::Get().WaitUntilTaskCompletes(MyTestTaskEvent); // 统计到CPU Stall - Wait For Event }

TaskGraph的任务一般会放在STATGROUP_TaskGraphTasks和STATGROUP_ThreadPoolAsyncTasks分组中。以下为某个Stats文件收集到的Task名称:

其他统计说明

CPU停转的统计:

Stat系统自己开销的统计:

总结

① 通过定义线程安全的埋点变量,在其构造函数(从变量定义的地方开始记录)与析构函数(结束记录)中来计算埋点变量在生命周期范围的耗时

② 在记录数据时,会带上Thread Id。因此,在Profiler工具中展示数据时,会按照线程做大类进行分类

③ 在Profiler工具的CallStack树,是埋点变量之间的嵌套关系,与代码的函数调用CallStack没有关系

参考

https://docs.unrealengine.com/4.26/zh-CN/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

https://docs.unrealengine.com/4.26/en-US/TestingAndOptimization/PerformanceAndProfiling/StatCommands/StatsSystemOverview/

UE高级性能剖析技术(2) -CPU帧率瓶颈和卡顿

原文地址:https://www.cnblogs.com/kekec/p/14960139.html