WebM是Google提出的新的网络视频格式,本质上是个MKV的壳,封装VPX中的VP8视频流与Vorbis OGG音频流。目前Firefox、Opera、Chrome都能直接打开WebM视频文件而无需其他任何乱七八糟的插件。我个人倒是很喜欢WebM的OGG音频,虽然在低比特率下不如AAC,不过依旧胜过MP3太多了。
最近接手了一个项目,将Showcase中的Flash视频导出替换为WebM视频导出,着实蛋疼了一把,因为ffmpeg这个破玩意的最新二进制版本虽然集成了VPX,不过由于许可证等等原因,商业软件不好直接使用。一气之下我直接用Google提供的WebM SDK搞定从序列帧到视频的输出,完全摆脱ffmpeg。
对于WebM SDK我了找到的三个问题:
- 依旧没有内建RGB24到YV12的转换,不得不手动来。
- SDK提供的simple_encoder产生出的IVF依旧无法播放。
- 如果构造了一个YV12格式的vpx_image_t对象,这个对象无法重复使用,产生的视频有错。
下面是我的WebMEnc编码器主文件的代码,不明白的WebM SDK如何使用的朋友可以学习一下。JPEG、TIFF、PNG的读取使用了FreeImage。
完整的代码在Ortholab的SVN里有。
可执行二进制程序可以在这里下载。
// Copyright (c) 2011 Bo Zhou<Bo.Schwarzstein@gmail.com> // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include <stdio.h> #include <stdlib.h> #include <FreeImage.h> #include <vpx/vpx_codec.h> #include <vpx/vpx_encoder.h> #include <vpx/vpx_image.h> #include <vpx/vpx_version.h> #include <vpx/vp8cx.h> #include "EbmlWriter.h" #define rgbtoy(b, g, r, y) \ y=(unsigned char)(((int)(30*r) + (int)(59*g) + (int)(11*b))/100) #define rgbtoyuv(b, g, r, y, u, v) \ rgbtoy(b, g, r, y); \ u=(unsigned char)(((int)(-17*r) - (int)(33*g) + (int)(50*b)+12800)/100); \ v=(unsigned char)(((int)(50*r) - (int)(42*g) - (int)(8*b)+12800)/100) #if defined(_MSC_VER) /* MSVS doesn't define off_t, and uses _f{seek,tell}i64 */ #define fseeko _fseeki64 #define ftello _ftelli64 #endif void rgb2YUV420P(vpx_image_t *rgbImage, vpx_image_t *yv12Image) { unsigned int width = rgbImage->w; unsigned int height = rgbImage->h; unsigned int planeSize = width * height; unsigned int halfWidth = width >> 1; unsigned char* yPlane = yv12Image->img_data; unsigned char* uPlane = yPlane + planeSize; unsigned char* vPlane = uPlane + (planeSize >> 2); static const int rgbIncrement = 3; unsigned char* rgb = rgbImage->img_data; for (unsigned int y = 0; y < height; ++ y) { unsigned char* yLine = yPlane + (y * width); unsigned char* uLine = uPlane + ((y >> 1) * halfWidth); unsigned char* vLine = vPlane + ((y >> 1) * halfWidth); for (unsigned int x = 0; x < width; x += 2) { rgbtoyuv(rgb[2], rgb[1], rgb[0], *yLine, *uLine, *vLine); rgb += rgbIncrement; yLine++; rgbtoyuv(rgb[2], rgb[1], rgb[0], *yLine, *uLine, *vLine); rgb += rgbIncrement; yLine++; uLine++; vLine++; } } } bool readImage(char *filename, int frameNumber, vpx_image_t **pRGBImage, vpx_image_t **pYV12Image) { // Load image. // char path[512]; sprintf(path, filename, frameNumber); FREE_IMAGE_FORMAT format = FIF_UNKNOWN; format = FreeImage_GetFIFFromFilename(filename); if ( (format == FIF_UNKNOWN) || ((format != FIF_JPEG) && (format != FIF_TIFF) && (format != FIF_PNG)) ) { return false; } FIBITMAP* dib = FreeImage_Load(format, path); if (dib == NULL) { return false; } unsigned w = FreeImage_GetWidth(dib); unsigned h = FreeImage_GetHeight(dib); if (*pRGBImage == NULL) { *pRGBImage = vpx_img_alloc(NULL, VPX_IMG_FMT_RGB24, w, h, 1); } if (*pYV12Image == NULL) { *pYV12Image = vpx_img_alloc(NULL, VPX_IMG_FMT_YV12, w, h, 1); } memcpy((*pRGBImage)->img_data, FreeImage_GetBits(dib), w * h * 3); rgb2YUV420P(*pRGBImage, *pYV12Image); vpx_img_flip(*pYV12Image); FreeImage_Unload(dib); return true; } int main(int argc, char* argv[]) { if (argc != 4) { printf(" Usage: WebMEnc <filename> <bit-rates> <output file>\nExample: WebMEnc frame.%%.5d.jpg 512 frame.webm\n"); return EXIT_FAILURE; } #ifdef FREEIMAGE_LIB FreeImage_Initialise(); #endif // Initialize VPX codec. // vpx_codec_ctx_t vpxContext; vpx_codec_enc_cfg_t vpxConfig; if (vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &vpxConfig, 0) != VPX_CODEC_OK) { return EXIT_FAILURE; } // Try to load the first frame to initialize width and height. // vpx_image_t *rgbImage = NULL, *yv12Image = NULL; if (readImage(argv[1], 0, &rgbImage, &yv12Image) == false) { return EXIT_FAILURE; } vpxConfig.g_h = yv12Image->h; vpxConfig.g_w = yv12Image->w; vpxConfig.rc_target_bitrate = atoi(argv[2]); vpxConfig.g_threads = 2; // Prepare the output .webm file. // EbmlGlobal ebml; memset(&ebml, 0, sizeof(EbmlGlobal)); ebml.last_pts_ms = -1; ebml.stream = fopen(argv[3], "wb"); if (ebml.stream == NULL) { return EXIT_FAILURE; } vpx_rational ebmlFPS = vpxConfig.g_timebase; struct vpx_rational arg_framerate = {30, 1}; Ebml_WriteWebMFileHeader(&ebml, &vpxConfig, &arg_framerate); if (vpx_codec_enc_init(&vpxContext, vpx_codec_vp8_cx(), &vpxConfig, 0) != VPX_CODEC_OK) { return EXIT_FAILURE; } // Reading image file sequence, encoding to .WebM file. // int frameNumber = 0; while(readImage(argv[1], frameNumber, &rgbImage, &yv12Image)) { vpx_codec_err_t vpxError = vpx_codec_encode(&vpxContext, yv12Image, frameNumber, 33, 0, 0); if (vpxError != VPX_CODEC_OK) { return EXIT_FAILURE; } vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *packet; while( (packet = vpx_codec_get_cx_data(&vpxContext, &iter)) ) { Ebml_WriteWebMBlock(&ebml, &vpxConfig, packet); } frameNumber ++; printf("Processed %d frames.\r", frameNumber); vpx_img_free(yv12Image); yv12Image = NULL; } Ebml_WriteWebMFileFooter(&ebml, 0); fclose(ebml.stream); vpx_codec_destroy(&vpxContext); #ifdef FREEIMAGE_LIB FreeImage_DeInitialise(); #endif return EXIT_SUCCESS; }