NetCore中带图片的word转html (NPOI.Word2Html)

NetCore的word转html很少见,特别是带图片转换为html的

查了很多资料做了些尝试

DocX 免费的不支持NetCore了

后来在nuget上查找发现DotNetCore.NPOI可以引用(后来发现NPOI其实就可以了)

然后就开始尝试

获取图片有点坑 踩了好大一会 找了好久才找到对应关系 然后就是 两张一样的图片只会显示一张 

github地址如下 如果有帮助 麻烦帮忙Star一下谢谢

https://github.com/ToolsByXLG/NPOI.Word2Html

首先nuget引用 NPOI

Install-Package NPOI

理论上如果不是netcore的话 直接引用NPOI就好

可能不太完善 可以稍微改进下

反正可以正常在centos上word转成html了

FileStream stream = new FileStream("1.docx", FileMode.Open,FileAccess.Read);

更新了代码,现在我项目中就是用的这个类,上传阿里云部分自己改改

using NPOI.OpenXmlFormats.Wordprocessing;
using NPOI.XWPF.UserModel;
  public class NpoiDocHelper 
    {
        private readonly IBinaryObjectManager _binaryObjectManager;


        public NpoiDocHelper(IBinaryObjectManager binaryObjectManager)
        {
            _binaryObjectManager = binaryObjectManager;
        }

        /// <summary>
        ///     Npoi处理Doc
        /// </summary>
        /// <param name="stream"></param>
        /// <param name="isImgUploadAliYun"></param>
        /// <returns></returns>
        public async Task<string> NpoiDoc(Stream stream, bool isImgUploadAliYun = false)
        {
            var myDocx = new XWPFDocument(stream); //打开07(.docx)以上的版本的文档


            var picInfoList = await PicturesHandleAsync(myDocx, isImgUploadAliYun);

            var sb = new StringBuilder();

            foreach (var para in myDocx.BodyElements)
                switch (para.ElementType)
                {
                    case BodyElementType.PARAGRAPH:
                    {
                        var paragraph = (XWPFParagraph) para;
                        sb.Append(ParaGraphHandle(paragraph, picInfoList));

                        break;
                    }

                    case BodyElementType.TABLE:
                        var paraTable = (XWPFTable) para;
                        sb.Append(TableHandle(paraTable, picInfoList));
                        break;
                }


            return sb.Replace(" style=''", "").ToString();
        }

        /// <summary>
        ///     图片处理
        /// </summary>
        /// <param name="myDocx"></param>
        /// <param name="isImgUploadAliYun">图片是否上传阿里云</param>
        /// <returns></returns>
        public async Task<List<PicInfo>> PicturesHandleAsync(XWPFDocument myDocx, bool isImgUploadAliYun = false)
        {
            var picInfoList = new List<PicInfo>();
            var picturesList = myDocx.AllPictures;
            foreach (var pictures in picturesList)
            {
                var pData = pictures.Data;
                var picPackagePart = pictures.GetPackagePart();
                var picPackageRelationship = pictures.GetPackageRelationship();
                var picInfo = new PicInfo
                {
                    Id = picPackageRelationship.Id,
                    PicType = picPackagePart.ContentType
                };


                try
                {
                    if (isImgUploadAliYun)
                    {
                        //阿里云上传图片方法
                        var url = await _binaryObjectManager.SaveAsync(new BinaryObject
                            {Bytes = pData, FileName = pictures.FileName, FileType = picInfo.PicType});
                        picInfo.Url = url;
                    }
                }
                catch (Exception)
                {
                    // ignored
                }

                if (string.IsNullOrWhiteSpace(picInfo.Url))
                    picInfo.Url = $"data:{picInfo.PicType};base64,{Convert.ToBase64String(pData)}";
                //先把pData传阿里云得到url  如果有其他方式传改这里 或者转base64

                picInfoList.Add(picInfo);
            }

            return picInfoList;
        }

        /// <summary>
        ///     word中的表格处理
        /// </summary>
        /// <param name="paraTable"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder TableHandle(XWPFTable paraTable, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();

            var rows = paraTable.Rows;
            sb.Append("<table border='1' cellspacing='0'>");
            foreach (var row in rows)
            {
                var cells = row.GetTableCells();

                sb.Append(
                    "<tr style='");
                //var firstRowCell = cells[0];


                sb.Append(
                    "'>");


                foreach (var cell in cells)
                {
                    var cellCtTc = cell.GetCTTc();
                    var tcPr = cellCtTc.tcPr;


                    sb.Append("<td style='");

                    if (!string.IsNullOrWhiteSpace(tcPr.tcW?.w))
                        sb.Append($"{tcPr.tcW.w}px;");
                    if (!string.IsNullOrWhiteSpace(tcPr.shd?.fill))
                        sb.Append($"background-color: #{tcPr.shd.fill};");

                    sb.Append("'>");
                    var cellParagraphs = cell.Paragraphs;
                    foreach (var cellParagraph in cellParagraphs)
                        sb.Append(ParaGraphHandle(cellParagraph, picInfoList));

                    //sb.Append(cell.GetText());
                    sb.Append("</td>");
                }


                sb.Append("</tr>");
            }

            sb.Append("</table>");
            return sb;
        }

        /// <summary>
        ///     word文本对应处理
        /// </summary>
        /// <param name="ctr"></param>
        /// <returns></returns>
        public StringBuilder FontHandle(CT_R ctr)
        {
            var sb = new StringBuilder();

            #region 文本格式

            var textList = ctr.GetTList();
            foreach (var text in textList)
            {
                sb.Append(
                    "<span style='");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.color?.val))
                    sb.Append(
                        $"color:#{ctr.rPr.color.val};");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.highlight?.val.ToString()))
                    sb.Append(
                        $"background-color: {ctr.rPr.highlight.val};");
                if (ctr.rPr?.i?.val == true)
                    sb.Append(
                        "font-style:italic;");
                if (ctr.rPr?.b?.val == true)
                    sb.Append(
                        "font-weight:bold;");
                if (ctr.rPr?.sz != null)
                    sb.Append(
                        $"font-size:{ctr.rPr.sz.val}px;");
                if (!string.IsNullOrWhiteSpace(ctr.rPr?.rFonts?.ascii))
                    sb.Append(
                        $"font-family:{ctr.rPr.rFonts.ascii};");

                sb.Append(
                    "'>");

                sb.Append(text.Value);
                sb.Append("</span>");
            }

            #endregion

            return sb;
        }

        /// <summary>
        ///     word图片对应处理
        /// </summary>
        /// <param name="ctr"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder DrawingHandle(CT_R ctr, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();
            var drawingList = ctr.GetDrawingList();
            foreach (var drawing in drawingList)
            {
                var a = drawing.GetInlineList();
                foreach (var a1 in a)
                {
                    var anyList = a1.graphic.graphicData.Any;

                    foreach (var any1 in anyList)
                    {
                        var pictures = picInfoList
                            .FirstOrDefault(x =>
                                any1.IndexOf("a:blip r:embed="" + x.Id + """, StringComparison.Ordinal) > -1);
                        if (pictures != null && !string.IsNullOrWhiteSpace(pictures.Url))
                            sb.Append($@"<img src='{pictures.Url}' />");
                    }
                }
            }

            return sb;
        }

        /// <summary>
        ///     word行处理为P标签
        /// </summary>
        /// <param name="paragraph"></param>
        /// <returns></returns>
        public StringBuilder TagPHandle(XWPFParagraph paragraph)
        {
            var sb = new StringBuilder();
            sb.Append("<p style='");

            try
            {
                //左右对齐

                var fontAlignment = paragraph.FontAlignment;
                string fontAlignmentName;
                switch (fontAlignment)
                {
                    case 0:
                        fontAlignmentName = "auto";
                        break;
                    case 1:
                        fontAlignmentName = "left";
                        break;
                    case 2:
                        fontAlignmentName = "center";
                        break;
                    case 3:
                        fontAlignmentName = "right";
                        break;
                    default:
                        fontAlignmentName = "auto";
                        break;
                }
                //自动和左对齐不需样式
                if (fontAlignment > 1) sb.Append($"text-align:{fontAlignmentName};");


                var em = paragraph.IndentationFirstLine / 240;

                if (em > 0) sb.Append($"text-indent:{em}em;");
            }
            catch (Exception)
            {
                // ignored
            }

            sb.Append("'>");
            return sb;
        }

        /// <summary>
        ///     word文档对应行内容处理
        /// </summary>
        /// <param name="paragraph"></param>
        /// <param name="picInfoList"></param>
        /// <returns></returns>
        public StringBuilder ParaGraphHandle(XWPFParagraph paragraph, List<PicInfo> picInfoList)
        {
            var sb = new StringBuilder();

            #region P标签

            sb.Append(TagPHandle(paragraph));

            #endregion


            var runs = paragraph.Runs;
            foreach (var run in runs)
            {
                var ctr = run.GetCTR();

                #region 图片格式

                sb.Append(DrawingHandle(ctr, picInfoList));

                #endregion

                #region 文本格式

                sb.Append(FontHandle(ctr));

                #endregion
            }

            sb.Append("</p>");
            return sb;
        }

        public class PicInfo
        {
            /// <summary>
            ///     图片编号
            /// </summary>
            public string Id { get; set; }

            /// <summary>
            ///     图片类型
            /// </summary>
            public string PicType { get; set; }

            /// <summary>
            ///     上传地址/或者Base64
            /// </summary>
            public string Url { get; set; }
        }
    }
原文地址:https://www.cnblogs.com/liwenyan/p/11768230.html