UE4之Game、Render、RHI多线程架构

游戏线程(GameThread)

GameThread是引擎运行的心脏,承载游戏逻辑、运行流程的工作,也是其它线程的数据发起者。在FEngineLoop::Tick函数执行每帧逻辑的更新。

在引擎启动时会把GameThread的线程id存储到全局变量GGameThreadId中,且稍后会设置到TaskGraph系统中。

int32 FEngineLoop::PreInitPreStartupScreen(const TCHAR* CmdLine)
{
    // ... ...
    
    // 创建线程自己的TLS数据FPerThreadFreeBlockLists  注:Binned2、Binned3内存分配器需要
    FMemory::SetupTLSCachesOnCurrentThread();
    
    // remember thread id of the main thread
    GGameThreadId = FPlatformTLS::GetCurrentThreadId();// 游戏线程id
    GIsGameThreadIdInitialized = true; // 游戏线程id是否被初始化

    FPlatformProcess::SetThreadAffinityMask(FPlatformAffinity::GetMainGameMask()); // 设置当前线程的cpu核的相关性  注:防止在多个cpu核心上跳来跳去,引发性能问题
    FPlatformProcess::SetupGameThread(); // 设置游戏线程数据(但很多平台都是空的实现体)
    
    // ... ...
    
    FTaskGraphInterface::Startup(FPlatformMisc::NumberOfCores()); // TaskGraph初始化,并根据当前机器cpu的核数来创建工作线程
    FTaskGraphInterface::Get().AttachToThread(ENamedThreads::GameThread); // 附加到TaskGraph的GameThread命名插槽中. 这样游戏线程便和TaskGraph联动了起来.
    

    if (GUseThreadedRendering)  // 如果使用渲染线程
    {
        if (GRHISupportsRHIThread) // 当前平台如果支持RHI线程
        {
            const bool DefaultUseRHIThread = true;
            GUseRHIThread_InternalUseOnly = DefaultUseRHIThread;
            if (FParse::Param(FCommandLine::Get(), TEXT("rhithread")))
            {
                GUseRHIThread_InternalUseOnly = true; // 创建独立的RHIThread,放加入到TaskGraph中,RHI会跑在TaskGraph的RHIThread上
            }
            else if (FParse::Param(FCommandLine::Get(), TEXT("norhithread")))
            {
                GUseRHIThread_InternalUseOnly = false;
            }
        }
            
        SCOPED_BOOT_TIMING("StartRenderingThread");
        StartRenderingThread();  // 创建并启动渲染线程
    }
    
    // ... ...
}

游戏线程和TaskGraph系统的ENamedThreads::GameThread其实是一回事,都是同一个线程!

经过上面的初始化和设置后,其它地方就可以通过TaskGraph系统并行地处理任务了,也可以访问全局变量,以便判断游戏线程是否初始化完,当前线程是否游戏线程:

bool IsInGameThread()
{
    return GIsGameThreadIdInitialized && FPlatformTLS::GetCurrentThreadId() == GGameThreadId;
}

渲染线程(RenderThread)

RenderThread在TaskGraph系统中有一个任务队列,其他线程(主要是GameThread)通过宏ENQUEUE_RENDER_COMMAND向该队列中填充任务

RenderThread则不断从这个队列中取出任务来执行,从而生成与平台无关的Command List(渲染指令列表)。注:整个过程是异步的

RenderThread是其他线程(主要是GameThread)的奴隶,只是简单地作为工作线程不断执行它们赋予的工作。 

RenderingThread.h声明了全部对外的接口,部分如下:

// Engine\Source\Runtime\RenderCore\Public\RenderingThread.h

// 是否启用了独立的渲染线程, 如果为false, 则所有渲染命令会被立即执行, 而不是放入渲染命令队列.
extern RENDERCORE_API bool GIsThreadedRendering;

// 渲染线程是否应该被创建. 通常被命令行参数或ToggleRenderingThread控制台参数设置.
extern RENDERCORE_API bool GUseThreadedRendering;

// 是否开启RHI线程
extern RENDERCORE_API void SetRHIThreadEnabled(bool bEnableDedicatedThread, bool bEnableRHIOnTaskThreads);

(......)

// 开启渲染线程.
extern RENDERCORE_API void StartRenderingThread();

// 停止渲染线程.
extern RENDERCORE_API void StopRenderingThread();

// 检查渲染线程是否健康(是否Crash), 如果crash, 则会用UE_Log输出日志.
extern RENDERCORE_API void CheckRenderingThreadHealth();

// 检查渲染线程是否健康(是否Crash)
extern RENDERCORE_API bool IsRenderingThreadHealthy();

// 增加一个必须在下一个场景绘制前或flush渲染命令前完成的任务.
extern RENDERCORE_API void AddFrameRenderPrerequisite(const FGraphEventRef& TaskToAdd);

// 手机帧渲染前序任务, 保证所有渲染命令被入队.
extern RENDERCORE_API void AdvanceFrameRenderPrerequisite();

// 等待所有渲染线程的渲染命令被执行完毕. 会卡住游戏线程, 只能被游戏线程调用.
extern RENDERCORE_API void FlushRenderingCommands(bool bFlushDeferredDeletes = false);

extern RENDERCORE_API void FlushPendingDeleteRHIResources_GameThread();
extern RENDERCORE_API void FlushPendingDeleteRHIResources_RenderThread();

extern RENDERCORE_API void TickRenderingTickables();

extern RENDERCORE_API void StartRenderCommandFenceBundler();
extern RENDERCORE_API void StopRenderCommandFenceBundler();

(......)

RenderingThread.h还有一个非常重要的宏ENQUEUE_RENDER_COMMAND,它的作用是向渲染线程入队渲染指令。下面是它的声明和实现:

// 向渲染线程入队渲染指令, Type指明了渲染操作的名字.
#define ENQUEUE_RENDER_COMMAND(Type) \
    struct Type##Name \
    {  \
        static const char* CStr() { return #Type; } \
        static const TCHAR* TStr() { return TEXT(#Type); } \
    }; \
    EnqueueUniqueRenderCommand<Type##Name>

上面最后一句使用了EnqueueUniqueRenderCommand命令,继续追踪之:

/* UnrealEngine\Engine\Source\Runtime\RenderCore\Public\RenderingThread.h */

/** The parent class of commands stored in the rendering command queue. */
class RENDERCORE_API FRenderCommand
{
public:
    // All render commands run on the render thread
    static ENamedThreads::Type GetDesiredThread() // 所有渲染指令都必须在渲染线程执行.
    {
        check(!GIsThreadedRendering || ENamedThreads::GetRenderThread() != ENamedThreads::GameThread);
        return ENamedThreads::GetRenderThread(); // 开启渲染多线程时,返回渲染线程。不开启渲染多线程时,返回GameThread
    }

    static ESubsequentsMode::Type GetSubsequentsMode()
    {
        // Don't support tasks having dependencies on us, reduces task graph overhead tracking and dealing with subsequents
        return ESubsequentsMode::FireAndForget;
    }
};

template<typename TSTR, typename LAMBDA>
class TEnqueueUniqueRenderCommandType : public FRenderCommand
{
public:
    TEnqueueUniqueRenderCommandType(LAMBDA&& InLambda) : Lambda(Forward<LAMBDA>(InLambda)) {}

    void DoTask(ENamedThreads::Type CurrentThread, const FGraphEventRef& MyCompletionGraphEvent)
    {
        TRACE_CPUPROFILER_EVENT_SCOPE_ON_CHANNEL_STR(TSTR::TStr(), RenderCommandsChannel);
        FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
        Lambda(RHICmdList);
    }

    FORCEINLINE_DEBUGGABLE TStatId GetStatId() const
    {
#if STATS
        static struct FThreadSafeStaticStat<FStat_EnqueueUniqueRenderCommandType> StatPtr_EnqueueUniqueRenderCommandType;
        return StatPtr_EnqueueUniqueRenderCommandType.GetStatId();
#else
        return TStatId();
#endif
    }

private:
    LAMBDA Lambda; // 缓存渲染回调函数.
};

/*************************************************************************************************************/

template<typename TSTR, typename LAMBDA> // 传入的TSTR为结构体类型,里面包含CStr和TStr的静态方法,为渲染命令名字。 LAMBDA是回调函数
FORCEINLINE_DEBUGGABLE void EnqueueUniqueRenderCommand(LAMBDA&& Lambda)
{
    QUICK_SCOPE_CYCLE_COUNTER(STAT_EnqueueUniqueRenderCommand);
    typedef TEnqueueUniqueRenderCommandType<TSTR, LAMBDA> EURCType;  // EURCType类型即为TEnqueueUniqueRenderCommandType<TSTR, LAMBDA>类型

#if 0 // UE_SERVER && UE_BUILD_DEBUG
    UE_LOG(LogRHI, Warning, TEXT("Render command '%s' is being executed on a dedicated server."), TSTR::TStr())
#endif

    if (IsInRenderingThread()) // 如果在渲染线程内直接执行回调而不入队渲染命令.
    {
        FRHICommandListImmediate& RHICmdList = GetImmediateCommandList_ForRenderCommand();
        Lambda(RHICmdList); // 在渲染线程中,直接执行传入的lamda匿名函数
    }
    else
    {
        if (ShouldExecuteOnRenderThread()) // if ((GIsThreadedRendering || !IsInGameThread())) // 使用渲染线程 or 当前不为GameThread  // 需要在独立的渲染线程执行
        {
            CheckNotBlockedOnRenderThread();
            TGraphTask<EURCType>::CreateTask().ConstructAndDispatchWhenReady(Forward<LAMBDA>(Lambda)); // 向渲染线程的TaskGraph队列里面投递类型名为EURCType类型的任务,并将lamda匿名函数作为参数传入该任务的构造函数
        }
        else
        {
            // 不在独立的渲染线程执行,则构建EURCType类型的对象,然后直接执行
            EURCType TempCommand(Forward<LAMBDA>(Lambda));
            FScopeCycleCounter EURCMacro_Scope(TempCommand.GetStatId());
            TempCommand.DoTask(ENamedThreads::GameThread, FGraphEventRef());
        }
    }
}

为了更好理解入队渲染命令操作,举2个具体的例子:

例1:在GameThread执行LoadMap切地图,在卸载掉Old World之后,会在TrimMemory()函数中使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FlushCommand任务

ENQUEUE_RENDER_COMMAND(FlushCommand)(
/* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
struct FlushCommandName
{
    static const char* CStr() { return "FlushCommand"; }
    static const TCHAR* TStr() { return L"FlushCommand"; }
};
EnqueueUniqueRenderCommand<FlushCommandName>( */
    [](FRHICommandList& RHICmdList)
    {
        GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
        RHIFlushResources();
        GRHICommandList.GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::FlushRHIThreadFlushResources);
    });

例2:在GameThread中执行控制台变量命令,会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个OnCVarChange1任务,以便将新的数值传递到RenderThread的逻辑中使用

virtual void OnCVarChange(int32& Dest, int32 NewValue)
{
    int32* DestPtr = &Dest;
    ENQUEUE_RENDER_COMMAND(OnCVarChange1)(
    /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
    struct OnCVarChange1Name
    {
        static const char* CStr() { return "OnCVarChange1"; }
        static const TCHAR* TStr() { return L"OnCVarChange1"; }
    };
    EnqueueUniqueRenderCommand<OnCVarChange1Name>( */
        [DestPtr, NewValue](FRHICommandListImmediate& RHICmdList)
        {
            *DestPtr = NewValue;
        });
}

FRenderingThread承载了渲染线程的主要工作,它的部分接口和实现代码如下:

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

class FRenderingThread : public FRunnable
{
private:
    bool bAcquiredThreadOwnership;    // 当没有独立的RHI线程时, 渲染线程将被其它线程捕获.

public:
    FEvent* TaskGraphBoundSyncEvent; // TaskGraph同步事件, 以便在主线程使用渲染线程之前就将渲染线程绑定到TaskGraph体系中.

    FRenderingThread()
    {
        bAcquiredThreadOwnership = false;
        // 获取同步事件.
        TaskGraphBoundSyncEvent    = FPlatformProcess::GetSynchEventFromPool(true);
        RHIFlushResources();
    }

    // FRunnable interface.
    virtual bool Init(void) override
    {
        // 获取当前线程ID到全局变量GRenderThreadId, 以便其它地方引用.
        GRenderThreadId = FPlatformTLS::GetCurrentThreadId();
        
        // 处理线程捕获关系.
        if (!IsRunningRHIInSeparateThread())
        {
            bAcquiredThreadOwnership = true;
            RHIAcquireThreadOwnership();
        }

        return true; 
    }
    
    (......)
    
    virtual uint32 Run(void) override
    {
        // 设置TLS.
        FMemory::SetupTLSCachesOnCurrentThread();
        // 设置渲染线程平台相关的数据.
        FPlatformProcess::SetupRenderThread();

        (......)
        
        {
            // 进入渲染线程主循环.
            RenderingThreadMain( TaskGraphBoundSyncEvent );
        }
        
        FMemory::ClearAndDisableTLSCachesOnCurrentThread();
        return 0;
    }
};

可见它在运行之后会进入渲染线程逻辑,这里再进入RenderingThreadMain代码一探究竟:

void RenderingThreadMain( FEvent* TaskGraphBoundSyncEvent )
{
    LLM_SCOPE(ELLMTag::RenderingThreadMemory);
    
    // 将渲染线程和局部线程线程插槽设置成ActualRenderingThread和ActualRenderingThread_Local.
    ENamedThreads::Type RenderThread = ENamedThreads::Type(ENamedThreads::ActualRenderingThread);

    ENamedThreads::SetRenderThread(RenderThread);
    ENamedThreads::SetRenderThread_Local(ENamedThreads::Type(ENamedThreads::ActualRenderingThread_Local));
    
    // 将当前线程附加到TaskGraph的RenderThread插槽中.
    FTaskGraphInterface::Get().AttachToThread(RenderThread);
    FPlatformMisc::MemoryBarrier();

    // 触发同步事件, 通知主线程渲染线程已经附加到TaskGraph, 已经准备好接收任务.
    if( TaskGraphBoundSyncEvent != NULL )
    {
        TaskGraphBoundSyncEvent->Trigger();
    }

    (......)
    
    // 渲染线程不同阶段的处理.
    FCoreDelegates::PostRenderingThreadCreated.Broadcast();
    check(GIsThreadedRendering);
    FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(RenderThread);
    FPlatformMisc::MemoryBarrier();
    check(!GIsThreadedRendering);
    FCoreDelegates::PreRenderingThreadDestroyed.Broadcast();
    
    (......)
    
    // 恢复线程线程到游戏线程.
    ENamedThreads::SetRenderThread(ENamedThreads::GameThread);
    ENamedThreads::SetRenderThread_Local(ENamedThreads::GameThread_Local);
    FPlatformMisc::MemoryBarrier();
}

不过这里还留有一个很大的疑问,那就是FRenderingThread只是获取当前线程作为渲染线程并附加到TaskGraph中,并没有创建线程。

那么是哪里创建的渲染线程呢?继续追踪,结果发现是在StartRenderingThread()接口中创建了FRenderingThread实例,它的实现代码如下(节选):

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

void StartRenderingThread()
{
    (......)

    // Turn on the threaded rendering flag.
    GIsThreadedRendering = true;

    // 创建FRenderingThread实例.
    GRenderingThreadRunnable = new FRenderingThread();

    // 创建渲染线程!!
    GRenderingThread = FRunnableThread::Create(GRenderingThreadRunnable, *BuildRenderingThreadName(ThreadCount), 0, FPlatformAffinity::GetRenderingThreadPriority(), FPlatformAffinity::GetRenderingThreadMask(), FPlatformAffinity::GetRenderingThreadFlags());
    
    (......)

    // 开启渲染命令的栅栏.
    FRenderCommandFence Fence;
    Fence.BeginFence();
    Fence.Wait();

    (......)
}

如果继续追踪,会发现StartRenderingThread()是在FEngineLoop::PreInitPostStartupScreen中调用的。

至此,渲染线程的创建、初始化以及主要接口的实现都剖析完了。

RHI线程(RHIThread)

RenderThread作为前端(frontend)产生的Command List是平台无关的,是抽象的图形API调用;

而RHIThread作为后端(backend)会执行和转换渲染线程的Command List成为指定图形API的调用(称为Graphical Command),并提交到GPU执行。

RHI线程的工作是转换渲染指令到指定图形API,创建、上传渲染资源到GPU。实现代码如下:

// Engine\Source\Runtime\RenderCore\Private\RenderingThread.cpp

class FRHIThread : public FRunnable
{
public:
    FRunnableThread* Thread;    // 所在的RHI线程.

    FRHIThread()
        : Thread(nullptr)
    {
        check(IsInGameThread());
    }
    
    void Start()
    {
        // 开始时创建RHI线程.
        Thread = FRunnableThread::Create(this, TEXT("RHIThread"), 512 * 1024, FPlatformAffinity::GetRHIThreadPriority(),
            FPlatformAffinity::GetRHIThreadMask(), FPlatformAffinity::GetRHIThreadFlags()
            );
        check(Thread);
    }

    virtual uint32 Run() override
    {
        LLM_SCOPE(ELLMTag::RHIMisc);
        
        // 初始化TLS
        FMemory::SetupTLSCachesOnCurrentThread();
        // 将FRHIThread所在的RHI线程附加到askGraph体系中,并指定到ENamedThreads::RHIThread。
        FTaskGraphInterface::Get().AttachToThread(ENamedThreads::RHIThread);
        // 启动RHI线程,直到线程返回。
        FTaskGraphInterface::Get().ProcessThreadUntilRequestReturn(ENamedThreads::RHIThread);
        // 清理TLS.
        FMemory::ClearAndDisableTLSCachesOnCurrentThread();
        return 0;
    }
    
    // 单例接口。
    static FRHIThread& Get()
    {
        static FRHIThread Singleton; // 使用了局部静态变量,可以保证线程安全。
        return Singleton;
    }
};

可见RHI线程不同于渲染线程,是直接在FRHIThread对象内创建实际的线程。而FRHIThread的创建也是在StartRenderingThread()中:

void StartRenderingThread()
{
    (......)

    if (GUseRHIThread_InternalUseOnly)
    {
        FRHICommandListExecutor::GetImmediateCommandList().ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);        
        if (!FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::RHIThread))
        {
            // 创建FRHIThread实例并启动它.
            FRHIThread::Get().Start();
        }
        DECLARE_CYCLE_STAT(TEXT("Wait For RHIThread"), STAT_WaitForRHIThread, STATGROUP_TaskGraphTasks);
        
        // 创建RHI线程拥有者捕获任务, 让游戏线程等待.
        FGraphEventRef CompletionEvent = TGraphTask<FOwnershipOfRHIThreadTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(true, GET_STATID(STAT_WaitForRHIThread));
        QUICK_SCOPE_CYCLE_COUNTER(STAT_StartRenderingThread);
        // 让游戏线程或局部线程等待RHI线程处理(捕获了线程拥有者, 大多数图形API为空)完毕.
        FTaskGraphInterface::Get().WaitUntilTaskCompletes(CompletionEvent, ENamedThreads::GameThread_Local);
        // 存储RHI线程id.
        GRHIThread_InternalUseOnly = FRHIThread::Get().Thread;
        check(GRHIThread_InternalUseOnly);
        GIsRunningRHIInDedicatedThread_InternalUseOnly = true;
        GIsRunningRHIInSeparateThread_InternalUseOnly = true;
        GRHIThreadId = GRHIThread_InternalUseOnly->GetThreadID();
        
        GRHICommandList.LatchBypass();
    }
    
    (......)
}

以Fortnite(堡垒之夜)移动端为例,在开启RHI线程之前,渲染线程急剧地上下波动,而加了RHI线程之后,波动平缓许多,和游戏线程基本保持一致,帧率也提升不少:

GameThread、RenderThread、RHIThread之间的同步机制

这3个线程处理的数据通常是不同帧的,譬如GameThread处理N帧数据,RenderThread和RHIThread处理N-1帧数据。

但也存在例外,比如RenderThread和RHIThread运行很快,几乎不存在延迟,这种情况下,GameThread处理N帧,而RenderThread可能处理N或N-1帧,RHIThread也可能在转换N或N-1帧。

但是,RenderThread不能落后游戏线程一帧,否则GameThread会卡住,直到RenderThread处理所有指令。

游戏线程和渲染线程的同步

游戏线程不可能领先于渲染线程超过一帧(最多快一帧),否则游戏线程会等待渲染线程处理完。它们的同步机制涉及两个关键的概念:

// Engine\Source\Runtime\RenderCore\Public\RenderCommandFence.h

// 渲染命令栅栏
class RENDERCORE_API FRenderCommandFence
{
public:
    // 向渲染命令队列增加一个栅栏. bSyncToRHIAndGPU是否同步RHI和GPU交换Buffer, 否则只等待渲染线程.
    void BeginFence(bool bSyncToRHIAndGPU = false); 

    // 等待栅栏被执行. bProcessGameThreadTasks没有作用.
    void Wait(bool bProcessGameThreadTasks = false) const;

    // 是否完成了栅栏.
    bool IsFenceComplete() const;

private:
    mutable FGraphEventRef CompletionEvent; // 处理完成同步的事件
    ENamedThreads::Type TriggerThreadIndex; // 处理完之后需要触发的线程类型.
};

// Engine\Source\Runtime\Engine\Public\UnrealEngine.h
class FFrameEndSync
{
    FRenderCommandFence Fence[2]; // 渲染栅栏对.
    int32 EventIndex; // 当前事件索引
public:
    // 同步游戏线程和渲染线程. bAllowOneFrameThreadLag是否允许渲染线程一帧的延迟.
    void Sync( bool bAllowOneFrameThreadLag )
    {
        Fence[EventIndex].BeginFence(true); // 开启栅栏, 强制同步RHI和GPU交换链的.

        bool bEmptyGameThreadTasks = !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread);
        
        // 保证游戏线程至少跑过一次任务.
        if (bEmptyGameThreadTasks)
        {
            FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
        }

        // 如果允许延迟, 交换事件索引.
        if( bAllowOneFrameThreadLag )
        {
            EventIndex = (EventIndex + 1) % 2;
        }

        (......)
        
        // 开启栅栏等待.
        Fence[EventIndex].Wait(bEmptyGameThreadTasks);
    }
};

在FRenderCommandFence的BeginFence函数中

当GameThread与RHI线程及GPU同步时,GameThread会使用宏ENQUEUE_RENDER_COMMAND向RenderThread的TaskGraph队列中投递一个FSyncFrameCommand任务,以便将Command List同步投递到RHI线程

当GameThread与RenderThread同步时,GameThread会创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行

在FRenderCommandFence的Wait函数中,会检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行则调用GameThreadWaitForTask函数来阻塞等待(通过Event实现)

void FRenderCommandFence::BeginFence(bool bSyncToRHIAndGPU)
{
    if (!GIsThreadedRendering)
    {
        return;
    }
    else
    {
        // Render thread is a default trigger for the CompletionEvent
        TriggerThreadIndex = ENamedThreads::ActualRenderingThread;
                
        if (BundledCompletionEvent.GetReference() && IsInGameThread())
        {
            CompletionEvent = BundledCompletionEvent;
            return;
        }

        int32 GTSyncType = CVarGTSyncType.GetValueOnAnyThread();
        if (bSyncToRHIAndGPU)
        {
            // Don't sync to the RHI and GPU if GtSyncType is disabled, or we're not vsyncing
            //@TODO: do this logic in the caller?
            static auto CVarVsync = IConsoleManager::Get().FindConsoleVariable(TEXT("r.VSync")); // 是否开了VSync
            check(CVarVsync != nullptr);

            if ( GTSyncType == 0 || CVarVsync->GetInt() == 0 ) // r.GTSyncType为0或r.VSync为0时,GameThread不与RHI线程及GPU同步
        {
                bSyncToRHIAndGPU = false;
            }
        }


        if (bSyncToRHIAndGPU) // GameThread与RHI线程及GPU同步时
        {
            if (IsRHIThreadRunning())
            {
                // Change trigger thread to RHI
                TriggerThreadIndex = ENamedThreads::RHIThread;
            }
            
            // Create a task graph event which we can pass to the render or RHI threads.
            CompletionEvent = FGraphEvent::CreateGraphEvent();

            FGraphEventRef InCompletionEvent = CompletionEvent;
            /* ---------------------------------------------- ENQUEUE_RENDER_COMMAND宏展开后 ------------------------------------------------
            struct FSyncFrameCommandName
            {
            static const char* CStr() { return "FSyncFrameCommand"; }
            static const TCHAR* TStr() { return L"FSyncFrameCommand"; }
            };
            EnqueueUniqueRenderCommand<FSyncFrameCommandName>( */
            ENQUEUE_RENDER_COMMAND(FSyncFrameCommand)(
                [InCompletionEvent, GTSyncType](FRHICommandListImmediate& RHICmdList)
                {
                    if (IsRHIThreadRunning()) // 如果开启了RHI线程
                    {
                        ALLOC_COMMAND_CL(RHICmdList, FRHISyncFrameCommand)(InCompletionEvent, GTSyncType); // 将创建的CompletionEvent投递到RHI线程的TaskGraph的任务队列中
                        RHICmdList.ImmediateFlush(EImmediateFlushType::DispatchToRHIThread);
                    }
                    else  // 渲染线程直接执行
                    {
                        FRHISyncFrameCommand Command(InCompletionEvent, GTSyncType);
                        Command.Execute(RHICmdList);
                    }
                });
        }
        else  // GameThead与RenderThread同步
        {
            // Sync Game Thread with Render Thread only
            DECLARE_CYCLE_STAT(TEXT("FNullGraphTask.FenceRenderCommand"),
            STAT_FNullGraphTask_FenceRenderCommand,
                STATGROUP_TaskGraphTasks);

            CompletionEvent = TGraphTask<FNullGraphTask>::CreateTask(NULL, ENamedThreads::GameThread).ConstructAndDispatchWhenReady(
                GET_STATID(STAT_FNullGraphTask_FenceRenderCommand), ENamedThreads::GetRenderThread());
        }
    }
}


/**
 * Waits for pending fence commands to retire.
 */
void FRenderCommandFence::Wait(bool bProcessGameThreadTasks) const
{
    if (!IsFenceComplete())
    {
        StopRenderCommandFenceBundler();

        GameThreadWaitForTask(CompletionEvent, TriggerThreadIndex, bProcessGameThreadTasks);
    }
}

bool FRenderCommandFence::IsFenceComplete() const
{
    if (!GIsThreadedRendering)
    {
        return true;
    }
    check(IsInGameThread() || IsInAsyncLoadingThread());
    CheckRenderingThreadHealth();
    if (!CompletionEvent.GetReference() || CompletionEvent->IsComplete())
    {
        CompletionEvent = NULL; // this frees the handle for other uses, the NULL state is considered completed
        return true;
    }
    return false;
}

FFrameEndSync的使用是在FEngineLoop::Tick中:

// Engine\Source\Runtime\Launch\Private\LaunchEngineLoop.cpp

void FEngineLoop::Tick()
{
    (......)
    
    // 在引擎循环的帧末尾添加游戏线程和渲染线程的同步事件.
    {
        static FFrameEndSync FrameEndSync; // 局部静态变量, 线程安全.
        static auto CVarAllowOneFrameThreadLag = IConsoleManager::Get().FindTConsoleVariableDataInt(TEXT("r.OneFrameThreadLag"));
        // 同步游戏和渲染线程, 是否允许一帧的延迟可由控制台命令控制. 默认是开启的.
        FrameEndSync.Sync( CVarAllowOneFrameThreadLag->GetValueOnGameThread() != 0 );
    }
    
    (......)
}

FlushRenderingCommands

在游戏线程中调用,会阻塞游戏线程,强行等待所有的渲染线程pending render command以及RHI线程中的指令执行完,相当于一次完整地对渲染线程的同步

/
 * Waits for the rendering thread to finish executing all pending rendering commands.  Should only be used from the game thread.
 */
void FlushRenderingCommands(bool bFlushDeferredDeletes)
{
    if (!GIsRHIInitialized)
    {
        return;
    }
    FSuspendRenderingTickables SuspendRenderingTickables;

    // Need to flush GT because render commands from threads other than GT are sent to
    // the main queue of GT when RT is disabled
    if (!GIsThreadedRendering
        && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread)
        && !FTaskGraphInterface::Get().IsThreadProcessingTasks(ENamedThreads::GameThread_Local))
    {
        FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread);
        FTaskGraphInterface::Get().ProcessThreadUntilIdle(ENamedThreads::GameThread_Local);
    }

    ENQUEUE_RENDER_COMMAND(FlushPendingDeleteRHIResourcesCmd)(
        [bFlushDeferredDeletes](FRHICommandListImmediate& RHICmdList)
    {
        RHICmdList.ImmediateFlush(
            bFlushDeferredDeletes ?
            EImmediateFlushType::FlushRHIThreadFlushResourcesFlushDeferredDeletes :
            EImmediateFlushType::FlushRHIThreadFlushResources);
    });

    // Find the objects which may be cleaned up once the rendering thread command queue has been flushed.
    FPendingCleanupObjects* PendingCleanupObjects = GetPendingCleanupObjects();

    // Issue a fence command to the rendering thread and wait for it to complete.
    FRenderCommandFence Fence;
    Fence.BeginFence(); // 创建一个FNullGraphTask空任务,放到RenderThread的TaskGraph队列中让其执行
    Fence.Wait(); // 检查投递给RenderThread的CompletionEvent是否被执行,如果没有执行就会阻塞等待

    // Delete the objects which were enqueued for deferred cleanup before the command queue flush.
    delete PendingCleanupObjects;
}

渲染线程和RHI线程的同步

RenderThread每次在调用RenderViewFamily_RenderThread的起始处,会阻塞等待所有RHI指令处理完成,然后才开始当前帧的渲染逻辑。

FMobileSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:

FDeferredShadingSceneRender渲染管线下,RenderThread每一帧都会执行ImmediateFlush,阻塞等待RHI处理完FGraphEventRef RHIThreadTask任务,代码如下:

阻塞时的stats栈(移动端)

参考

剖析虚幻渲染体系(02)- 多线程渲染

UE4主线程与渲染线程同步

原文地址:https://www.cnblogs.com/kekec/p/15464958.html