一.对HDFS操作设计以下几个主要的类:Configuration:封装了客户端或者服务器的配置信息
FileSystem:此类的对象是一个文件系统对象,可以用该对象的一些方法来对文件进行操作通过FileSystem的静态方法get获得该对象,例:FileSystem hdfs = FileSystem.get(conf);
FSDataInputStream:这是HDFS中的输入流,通过由FileSystem的open方法获取
FSDataOutputStream:这是HDFS中的输出流,通过由FileSystem的create方法获取
二.依赖配置
xmlns:xsi="[http://www.w3.org/2001/XMLSchema-instance"](http://www.w3.org/2001/XMLSchema-instance")
xsi:schemaLocation=”http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd”>
4.0.0
com.hdfs
HadoopTest
0.0.1-SNAPSHOT
jar
HadoopTest
http://maven.apache.org
org.springframework.boot
spring-boot-starter-parent
2.0.0.RELEASE
UTF-8
UTF-8
1.8
org.springframework.boot
spring-boot-starter
org.springframework.boot
spring-boot-starter-test
test
org.springframework.boot
spring-boot-starter-web
org.apache.hadoop
hadoop-common
3.1.1
org.apache.hadoop
hadoop-hdfs
3.1.1
org.apache.hadoop
hadoop-client
3.1.1
org.apache.hadoop
hadoop-mapreduce-client-core
3.1.1
cn.bestwu
ik-analyzers
5.1.0
jdk.tools
jdk.tools
1.8
system
${JAVA_HOME}/lib/tools.jar
junit
junit
test
org.springframework.boot
spring-boot-maven-plugin
org.apache.maven.plugins
maven-compiler-plugin
1.8
1.8
# tomcat thread = 200
server.tomcat.max-threads=1000
server.port=8900
# session time 30
server.session-timeout=60
spring.application.name=hadoop
spring.servlet.multipart.max-file-size=50MB
spring.servlet.multipart.max-request-size=50MB
hdfs.path=hdfs://localhost:9000
hdfs.username=linhaiy
logging.config=classpath:logback.xml
三.HDFS文件操作接口开发
package com.hadoop.config;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Configuration;
/
HDFS配置类
@author linhaiy
@date 2019.05.18
/
@Configuration
public class HdfsConfig {
@Value(“${hdfs.path}”)
private String path;
public String getPath() {
return path;
}
public void setPath(String path) {
this.path = path;
}
}
package com.hadoop.hdfs.entity;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/
用户实体类
@author linhaiy
@date 2019.05.18
/
public class User implements Writable {
private String username;
private Integer age;
private String address;
public User() {
super();
// TODO Auto-generated constructor stub
}
public User(String username, Integer age, String address) {
super();
this.username = username;
this.age = age;
this.address = address;
}
@Override
public void write(DataOutput output) throws IOException {
// 把对象序列化
output.writeChars(username);
output.writeInt(age);
output.writeChars(address);
}
@Override
public void readFields(DataInput input) throws IOException {
// 把序列化的对象读取到内存中
username = input.readUTF();
age = input.readInt();
address = input.readUTF();
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public Integer getAge() {
return age;
}
public void setAge(Integer age) {
this.age = age;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
@Override
public String toString() {
return “User [username=” + username + “, age=” + age + “, address=” + address + “]”;
}
}
package com.hadoop.hdfs.service;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.PostConstruct;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.multipart.MultipartFile;
import com.hadoop.util.JsonUtil;
@Component
public class HdfsService {
@Value(“${hdfs.path}”)
private String path;
@Value(“${hdfs.username}”)
private String username;
private static String hdfsPath;
private static String hdfsName;
private static final int bufferSize = 1024 1024 64;
/
获取HDFS配置信息
@return
*/
private static Configuration getConfiguration() {
Configuration configuration = new Configuration();
configuration.set(“fs.defaultFS”, hdfsPath);
return configuration;
}
/
获取HDFS文件系统对象
@return
@throws Exception
/
public static FileSystem getFileSystem() throws Exception {
// 客户端去操作hdfs时是有一个用户身份的,默认情况下hdfs客户端api会从jvm中获取一个参数作为自己的用户身份
// DHADOOP_USER_NAME=hadoop
// 也可以在构造客户端fs对象时,通过参数传递进去
FileSystem fileSystem = FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
return fileSystem;
}
/
在HDFS创建文件夹
@param path
@return
@throws Exception
*/
public static boolean mkdir(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
if (existFile(path)) {
return true;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
boolean isOk = fs.mkdirs(srcPath);
fs.close();
return isOk;
}
/
判断HDFS文件是否存在
@param path
@return
@throws Exception
/
public static boolean existFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
FileSystem fs = getFileSystem();
Path srcPath = new Path(path);
boolean isExists = fs.exists(srcPath);
return isExists;
}
/**
读取HDFS目录信息
@param path
@return
@throws Exception
/
public static List

