package pwc.taxtech.atms.common; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.sax.BodyContentHandler; import org.xml.sax.ContentHandler; import java.io.*; public class TikaTest { public static void main(String[] args) { try { // Tika tika = new Tika(); File xpsFile = new File("C:\\woo\\海关稽核结果.xps"); InputStream inputStream = new FileInputStream(xpsFile); // String FileName = xpsFile.getName(); // Metadata metadata = new Metadata(); // if (FileName != null && FileName.length() > 0) // metadata.add(Metadata.RESOURCE_NAME_KEY, FileName); // String MimeType = tika.detect(inputStream, metadata); // // metadata.add(Metadata.CONTENT_TYPE, MimeType); // inputStream.close(); // inputStream = new FileInputStream(xpsFile); // Reader reader = tika.parse(inputStream, metadata); // String content = IOUtils.toString(reader); // inputStream.close(); Metadata metadata = new Metadata(); ContentHandler handler = new BodyContentHandler(); new TmpXPSParser().parse(inputStream, handler, metadata, new ParseContext()); String content = handler.toString(); System.out.println(content); } catch (Exception e) { e.printStackTrace(); } } }