字幕文件处理(2)

摘要

上一篇文章我们实现了整数与时间格式的互转,常见的字幕文件的格式有WebVTT, SRT, TTML, 有的系统要求我们提供VTT格式, 有的系统只支持TTML格式,我们字幕做完一个拿到的可能是SRT格式, 所以设计到将不同格式的字幕文件进行转换。

本文介绍的示例代码实现了VTT 与SRT互转, 也可以将VTT或SRT转化到TTML。

同样, 匹配时间格式的正则表达式是: 

"([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)? --> ([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)?"

字幕格式对象是: 

    class ClosedCaption
    {
        public string StartPoint { get; set; }
        public string EndPoint { get; set; }
        public string Transcript { get; set; }
        public override string ToString()
        {
            StringBuilder sb = new StringBuilder();
            sb.AppendLine(string.Format("{0} --> {1}", StartPoint, EndPoint));
            sb.AppendLine(Transcript);
            return sb.ToString();
        }
    }

从文件中读取字幕格式对象: 

        public static void ReadTranscript(string filePath)
        {
            //0:0:4.480 --> 0:0:7.430
            string timePattern = @"([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)? --> ([0-9]+:)?([0-9]+):([0-9]+)([.|,][0-9]+)?";

            using (var stream = new FileStream(filePath, FileMode.Open))
            {
                StreamReader reader = new StreamReader(stream);
                string fileContent = reader.ReadToEnd();

                // handle CC time
                var cues = Regex.Matches(fileContent, timePattern, RegexOptions.IgnoreCase);
                Captions = new List<ClosedCaption>();
                foreach (Match cue in cues)
                {
                    string timeLine = cue.Value.ToString();
                    string[] timeInfo = timeLine.Split(new string[] { "-->" }, StringSplitOptions.RemoveEmptyEntries);
                    if (timeInfo.Length == 2)
                    {
                        string startInfo = timeInfo[0].Trim();
                        string endInfo = timeInfo[1].Trim();

                        startInfo = TimeFormat.ToHHMMSS(TimeFormat.ToDouble(startInfo),"t1");
                        endInfo = TimeFormat.ToHHMMSS(TimeFormat.ToDouble(endInfo), "t1");
                        Captions.Add(new ClosedCaption
                        {
                            StartPoint = startInfo,
                            EndPoint = endInfo
                        });
                    }
                }

                string newContent = Regex.Replace(fileContent, timePattern, "-->");
                string[] splitParts = newContent.Split(new string[] { "-->"},StringSplitOptions.RemoveEmptyEntries);
                if (splitParts.Length -1 == Captions.Count)
                {
                    for (int i = 1; i < splitParts.Length; i++)
                    {
                        //Captions[i-1].Transcript = splitParts[i];
                        string rawTranscript = splitParts[i];
                        string firstTrim =  rawTranscript.Trim(new char[] { '
', '
' });

                        //trim last digital character
                        int digitalCount = 0;
                        if (firstTrim.Length > 1)
                        {
                            for (int x = firstTrim.Length - 1; x > firstTrim.Length - 5; x--)
                            {
                                int d = 0;
                                if (Int32.TryParse(firstTrim[x].ToString(), out d) == true)
                                    digitalCount++;
                                else
                                    break;
                            }
                        }

                        string secondTrim = firstTrim;
                        if (digitalCount != 0)
                        {
                            secondTrim = firstTrim.Remove(firstTrim.Length - digitalCount);                            
                        }

                        Captions[i - 1].Transcript = secondTrim.Trim(new char[] { '
', '
'}).Trim();
                    }
                }
            }
        }


由字幕对象生成VTT, SRT, 和TTML:

        public static void Write2VTT(string vtt)
        {
            if (Captions.Count > 0)
            {
                StringBuilder sb = new StringBuilder();
                sb.AppendLine("WEBVTT");
                sb.AppendLine();
                foreach (var item in Captions)
                {
                    sb.AppendLine(item.ToString()); //here will input a blank line because of two AppendLine();
                }

                using (StreamWriter writer = new StreamWriter(vtt, false))
                {
                    writer.Write(sb.ToString());
                    writer.Flush();
                    writer.Close();
                }
            }
        }

        public static void Write2SRT(string srt)
        {
            if (Captions.Count > 0)
            {
                StringBuilder sb = new StringBuilder();

                for (int i = 0; i < Captions.Count; i++)
                {
                    sb.AppendLine((i + 1).ToString());
                    sb.AppendLine(Captions[i].ToString()); // note here will input a blank line because of two AppendLine();
                }
                using (StreamWriter writer = new StreamWriter(srt))
                {
                    writer.Write(sb.ToString());
                    writer.Flush();
                    writer.Close();
                }
            }
        }

        public static void Write2TTML(string ttml)
        {
            StringBuilder sbContent = new StringBuilder();
            string Content = string.Empty;
            using (StreamReader sr = new StreamReader("ttSample1.txt"))
            {
                Content = sr.ReadToEnd();
            }

            if (Captions.Count > 0)
            {
                sbContent.AppendLine("<div region="subtitleArea">");
                for (int i = 0; i < Captions.Count; i++)
                {
                    double beginTime = TimeFormat.ToDouble(Captions[i].StartPoint);
                    double endTime = TimeFormat.ToDouble(Captions[i].EndPoint);

                    string begin = TimeFormat.ToHHMMSS(beginTime, "t1");
                    string end = TimeFormat.ToHHMMSS(endTime,"t1");
                    string content = HttpUtility.HtmlEncode(Captions[i].Transcript);
                    sbContent.AppendLine(string.Format("<p begin="{1}" id="{0}" end="{2}">{3}</p>", "p" + i, begin, end, content));
                }
                sbContent.AppendLine(@"</div>");

                Content = string.Format(Content, sbContent.ToString());

                using (StreamWriter writer = new StreamWriter(ttml))
                {
                    writer.Write(Content);
                    writer.Flush();
                    writer.Close();
                }
            }
        }

转化实例:CCConverter in gitHub

转载请注明出处http://www.cnblogs.com/qixue/p/5498396.html

原文地址:https://www.cnblogs.com/qixue/p/5498396.html