maven
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.14</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>1.14</version>
</dependency>
package com.alibaba.middleware.hsf;
import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.SAXException;
import java.io.*;
import java.net.ContentHandler;
/**
* Created by xiaoming.linxm on 2018/10/24.
*/
public class TestTika {
public static void main(String[] argv) throws IOException, TikaException {
Tika tika=new Tika();
//System.out.println(tika.parseToString(new URL("http://www.taobao.com")));
// System.out.println(tika.parseToString(new File("TikaSample.class")));
String[] tt=new String[]{"d:/test.doc"};
for (String file : tt) {
System.out.println(file);
System.out.println(tika.detect(new File(file)));
String text = tika.parseToString(new File(file));
System.out.print(text);
}
}
}