maven
<dependency><groupId>org.apache.tika</groupId><artifactId>tika-core</artifactId><version>1.14</version></dependency><dependency><groupId>org.apache.tika</groupId><artifactId>tika-parsers</artifactId><version>1.14</version></dependency>
package com.alibaba.middleware.hsf;import org.apache.tika.Tika;import org.apache.tika.exception.TikaException;import org.apache.tika.metadata.Metadata;import org.apache.tika.parser.AutoDetectParser;import org.apache.tika.sax.BodyContentHandler;import org.xml.sax.SAXException;import java.io.*;import java.net.ContentHandler;/*** Created by xiaoming.linxm on 2018/10/24.*/public class TestTika {public static void main(String[] argv) throws IOException, TikaException {Tika tika=new Tika();//System.out.println(tika.parseToString(new URL("http://www.taobao.com")));// System.out.println(tika.parseToString(new File("TikaSample.class")));String[] tt=new String[]{"d:/test.doc"};for (String file : tt) {System.out.println(file);System.out.println(tika.detect(new File(file)));String text = tika.parseToString(new File(file));System.out.print(text);}}}
