webrtc (6) 在Webrtc中集成VideoToolbox

CFMutableDictionaryRef source_attrs = CFDictionaryCreateMutable (NULL, 0, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
CFNumberRef number;
number = CFNumberCreate (NULL, kCFNumberSInt16Type, &codec_settings->width);
CFDictionarySetValue (source_attrs, kCVPixelBufferWidthKey, number);
CFRelease (number);
number = CFNumberCreate (NULL, kCFNumberSInt16Type, &codec_settings->height);
CFDictionarySetValue (source_attrs, kCVPixelBufferHeightKey, number);
CFRelease (number);
OSType pixelFormat = kCVPixelFormatType_420YpCbCr8Planar;
number = CFNumberCreate (NULL, kCFNumberSInt32Type, &pixelFormat);
CFDictionarySetValue (source_attrs, kCVPixelBufferPixelFormatTypeKey, number);
CFRelease (number);
CFDictionarySetValue(source_attrs, kCVPixelBufferOpenGLESCompatibilityKey, kCFBooleanTrue);
OSStatus ret = VTCompressionSessionCreate(NULL, codec_settings->width, codec_settings->height, kCMVideoCodecType_H264, NULL, source_attrs, NULL, EncodedFrameCallback, this, &encoder_session_);
if (ret != 0) {
WEBRTC_TRACE(webrtc::kTraceError, webrtc::kTraceVideoCoding, -1,
"vt_encoder::InitEncode() fails to create encoder ret_val %d",
ret);
return WEBRTC_VIDEO_CODEC_ERROR;
}
CFRelease(source_attrs);

2，VT编码出来的数据是AVCC格式，需要转换为Annex－B格式，才能回调给Webrtc。主要区别在于数据开头是长度字段还是startCode，具体见stackoverflow的帖子。

同理，编码时，需要将webrtc的Annex－B格式转换为AVCC格式。

Annex-B：StartCode + Nalu1 + StartCode + Nalu2 + ...

AVCC ：Nalu1 length + Nalu1 + Nalu2 length + Nalu2 + ...

注意⚠：AVCC格式中的length字段需要是big endian顺序。length字段的长度可定制，一般为1/2/4byte，需要通过接口配置给解码器。

3，创建VideoFormatDescription

解码时需要创建VTDecompressionSession，需要一个VideoFormatDescription参数。

创建VideoFormatDescription需要首先从码流中获取到SPS和PPS，然后使用如下接口创建VideoFormatDescription

[objc] view plain copy

/*!
@function CMVideoFormatDescriptionCreateFromH264ParameterSets
@abstract Creates a format description for a video media stream described by H.264 parameter set NAL units.
@discussion This function parses the dimensions provided by the parameter sets and creates a format description suitable for a raw H.264 stream.
The parameter sets' data can come from raw NAL units and must have any emulation prevention bytes needed.
The supported NAL unit types to be included in the format description are 7 (sequence parameter set), 8 (picture parameter set) and 13 (sequence parameter set extension). At least one sequence parameter set and one picture parameter set must be provided.
*/
CM_EXPORT
OSStatus CMVideoFormatDescriptionCreateFromH264ParameterSets(
CFAllocatorRef allocator, /*! @param allocator
CFAllocator to be used when creating the CMFormatDescription. Pass NULL to use the default allocator. */
size_t parameterSetCount, /*! @param parameterSetCount
The number of parameter sets to include in the format description. This parameter must be at least 2. */
const uint8_t * constconst * parameterSetPointers, /*! @param parameterSetPointers
Points to a C array containing parameterSetCount pointers to parameter sets. */
const size_tsize_t * parameterSetSizes, /*! @param parameterSetSizes
Points to a C array containing the size, in bytes, of each of the parameter sets. */
int NALUnitHeaderLength, /*! @param NALUnitHeaderLength
Size, in bytes, of the NALUnitLength field in an AVC video sample or AVC parameter set sample. Pass 1, 2 or 4. */
CMFormatDescriptionRef *formatDescriptionOut ) /*! @param formatDescriptionOut
Returned newly-created video CMFormatDescription */
__OSX_AVAILABLE_STARTING(__MAC_10_9,__IPHONE_7_0);

4，判断VT编码出来的数据是否是keyframe

这个代码取自OpenWebrtc from Ericsson

[cpp] view plain copy

static bool
vtenc_buffer_is_keyframe (CMSampleBufferRef sbuf)
{
bool result = FALSE;
CFArrayRef attachments_for_sample;
attachments_for_sample = CMSampleBufferGetSampleAttachmentsArray (sbuf, 0);
if (attachments_for_sample != NULL) {
CFDictionaryRef attachments;
CFBooleanRef depends_on_others;
attachments = (CFDictionaryRef)CFArrayGetValueAtIndex (attachments_for_sample, 0);
depends_on_others = (CFBooleanRef)CFDictionaryGetValue (attachments,
kCMSampleAttachmentKey_DependsOnOthers);
result = (depends_on_others == kCFBooleanFalse);
}
return result;
}

4，SPS和PPS变化后判断VT是否还能正确解码

通过下面的接口判断是否需要需要更新VT

[objc] view plain copy

/*!
@function VTDecompressionSessionCanAcceptFormatDescription
@abstract Indicates whether the session can decode frames with the given format description.
@discussion
Some video decoders are able to accommodate minor changes in format without needing to be
completely reset in a new session. This function can be used to test whether a format change
is sufficiently minor.
*/
VT_EXPORT Boolean
VTDecompressionSessionCanAcceptFormatDescription(
VTDecompressionSessionRef session,
CMFormatDescriptionRef newFormatDesc ) __OSX_AVAILABLE_STARTING(__MAC_10_8,__IPHONE_8_0);

5，PTS

PTS会影响VT编码质量，一般情况下，duration参数表示每帧数据的时长，用样点数表示，一般视频采样频率为90KHz，帧率为30fps，则duration就是sampleRate / frameRate = 90K/30 = 3000.

而pts表示当前帧的显示时间，也用样点数表示，即 n_samples * sampleRate / frameRate.

[objc] view plain copy

VT_EXPORT OSStatus
VTCompressionSessionEncodeFrame(
VTCompressionSessionRef session,
CVImageBufferRef imageBuffer,
CMTime presentationTimeStamp,
CMTime duration, // may be kCMTimeInvalid
CFDictionaryRef frameProperties, // may be NULL
voidvoid * sourceFrameRefCon,
VTEncodeInfoFlags *infoFlagsOut /* may be NULL */ ) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_8_0);

6，编码选项

[objc] view plain copy

kVTCompressionPropertyKey_AllowTemporalCompression

[objc] view plain copy

kVTCompressionPropertyKey_AllowFrameReordering

TemporalCompression控制是否产生P帧。

FrameReordering控制是否产生B帧。

7，使用自带的PixelBufferPool提高性能。

创建VTSession之后会自动创建一个PixelBufferPool，用做循环缓冲区，降低频繁申请释放内存区域造成的额外开销。

[objc] view plain copy

VT_EXPORT CVPixelBufferPoolRef
VTCompressionSessionGetPixelBufferPool(
VTCompressionSessionRef session ) __OSX_AVAILABLE_STARTING(__MAC_10_8, __IPHONE_8_0);

[objc] view plain copy

CV_EXPORT CVReturn CVPixelBufferPoolCreatePixelBuffer(CFAllocatorRef allocator,
CVPixelBufferPoolRef pixelBufferPool,
CVPixelBufferRef *pixelBufferOut) __OSX_AVAILABLE_STARTING(__MAC_10_4,__IPHONE_4_0);

中间还有很多很多的细节，任何一处错误都是导致千奇百怪的crash／编码或解码失败等

多看看我提供的那几个链接，会很有帮助。

经过测试，iOS8 硬件编解码效果确实很好，比OpenH264出来的视频质量更清晰，并且能轻松达到30帧，码率控制的精确性也更高。