java实现word文件转html(图片用base64转化)
1.添加需要的jar包:
<dependency> <groupId>fr.opensagres.xdocreportgroupId> <artifactId>fr.opensagres.xdocreport.documentartifactId> <version>2.0.1version> dependency> <dependency> <groupId>org.apache.poigroupId> <artifactId>poiartifactId> <version>3.15version> dependency> <dependency> <groupId>org.apache.poigroupId> <artifactId>poi-scratchpadartifactId> <version>3.15version> dependency> <dependency> <groupId>fr.opensagres.xdocreportgroupId> <artifactId>fr.opensagres.poi.xwpf.converter.xhtmlartifactId> <version>2.0.1version> dependency>
2.来一个小demo吧。
对于该demo,描述几个我觉得需要注意的点:
2.1:不知道有没有小伙伴发生了jar包冲突的现象呢,可以考虑修改一下jar包版本号哦,基本上应该没什么问题呢;
2.2:word文档的后缀有.doc和.docx,需要知道转换的方法不是一样的。所以,对于不同的文档,我们需要知道其文档后缀是什么,才能进行下一步操作;
2.3:此demo,我选择通过接口直接返回动态的html,当然,如果想生成一个静态的html,可以自己修改输出方式;
2.4:对于文档中涉及到图片如何转化的问题,暂时选择用base64转码到html中
2.5:最后:此demo中测试转化的文档,目前只测试了简单的文本加图片,所以可能有别的问题待发现并解决。
/** * 将word转成html * * @param id * @return * @throws Exception */ @ApiOperation(value = "将word转成html") @GetMapping(value = "/convertWordToHtml") public void convertWordToHtml(@RequestParam(required = true) String id, HttpServletResponse httpServletResponse) throws Exception { demoService.convertWordToHtml(id, httpServletResponse); }
//此处省略部分不重要的代码哈,只需将需要转化的文档转成inputStream。
InputStream inputStream = null;
OutputStream outputStream = httpServletResponse.getOutputStream();
/**
* 将 docx 转成 html
*
* @param outputStream 输出流
* @throws Exception
*/
public static void convertDocxFileToHtml(OutputStream outputStream) throws Exception {
//创建操作word的对象
XWPFDocument document = new XWPFDocument(inputStream);
XHTMLOptions options = XHTMLOptions.create();
options.setIgnoreStylesIfUnused(false);
options.setFragment(true);
//图片用base64转化
options.setImageManager(new Base64EmbedImgManager());
//转化成HTML
XHTMLConverter.getInstance().convert(document, outputStream, options);
outputStream.flush();
outputStream.close();
inputStream.close();
}
/**
* 将 doc 转成 html
*
* @param outputStream 输出流
* @throws Exception
*/
public static void convertDocFileToHtml(OutputStream outputStream) throws Exception {
//ps:当inputStream!=null,而生成wordDocument报错,请检查文档是否用office word保存的
HWPFDocument wordDocument = (HWPFDocument) WordToHtmlUtils.loadDoc(inputStream);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument()
);
//将图片转成base64的格式
PicturesManager pictureRunMapper = (bytes, pictureType, s, v, v1) -> "data:image/png;base64," + Base64.encodeBase64String(bytes);
wordToHtmlConverter.setPicturesManager(pictureRunMapper);
//解析word文档
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outputStream);
TransformerFactory factory = TransformerFactory.newInstance();
Transformer serializer = factory.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
}