TikaTest.java 1.44 KB
Newer Older
eddie.woo's avatar
eddie.woo committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
package pwc.taxtech.atms.common;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;

import java.io.*;

public class TikaTest {
    public static void main(String[] args) {
        try {
//            Tika tika = new Tika();
            File xpsFile = new File("C:\\woo\\海关稽核结果.xps");

            InputStream inputStream = new FileInputStream(xpsFile);
//            String FileName = xpsFile.getName();
//            Metadata metadata = new Metadata();
//            if (FileName != null && FileName.length() > 0)
//                metadata.add(Metadata.RESOURCE_NAME_KEY, FileName);
//            String MimeType = tika.detect(inputStream, metadata);
//
//            metadata.add(Metadata.CONTENT_TYPE, MimeType);
//            inputStream.close();
//            inputStream = new FileInputStream(xpsFile);
//            Reader reader = tika.parse(inputStream, metadata);
//            String content = IOUtils.toString(reader);
//            inputStream.close();
            Metadata metadata = new Metadata();
            ContentHandler handler = new BodyContentHandler();
            new TmpXPSParser().parse(inputStream, handler, metadata, new ParseContext());
            String content = handler.toString();
            System.out.println(content);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}