将代码DownloadSomeData.java放在教程的Java Project中,代码如下:
package com.alibaba.alink;import org.apache.commons.io.FileUtils;import javax.net.ssl.HttpsURLConnection;import javax.net.ssl.SSLContext;import java.io.File;import java.net.URL;/*** 本代码只能下载部分章节的数据。* 全部数据链接地址:https://www.yuque.com/pinshu/alink_tutorial/book_java_reference*/public class DownloadSomeData {public static void main(String[] args) throws Exception {downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",Chap03.LOCAL_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",Chap03.LOCAL_DIR);downloadUrl("http://files.grouplens.org/datasets/movielens/ml-100k/u.data",Chap03.LOCAL_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt",Chap08.DATA_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data",Chap09.DATA_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data",Chap10.DATA_DIR);downloadUrl("http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/action_log.csv",Chap11.DATA_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data",Chap12.DATA_DIR);downloadUrl("http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz",Chap13.DATA_DIR);downloadUrl("http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz",Chap13.DATA_DIR);downloadUrl("http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz",Chap13.DATA_DIR);downloadUrl("http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz",Chap13.DATA_DIR);downloadUrl("http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv",Chap16.DATA_DIR);for (String fileName : new String[]{Chap24.ITEM_FILE, Chap24.USER_FILE,Chap24.RATING_FILE, Chap24.RATING_TRAIN_FILE, Chap24.RATING_TEST_FILE}) {downloadUrl("http://files.grouplens.org/datasets/movielens/ml-100k/" + fileName,Chap24.DATA_DIR);}downloadUrlHttps("https://raw.githubusercontent.com/tennessine/corpus/master/%E4%B8%89%E5%9B%BD%E6%BC%94%E4%B9%89.txt",Chap22.DATA_DIR);downloadUrlHttps("https://github.com/BenDerPan/toutiao-text-classfication-dataset/raw/master/toutiao_cat_data.txt.zip",Chap21.DATA_DIR);}static synchronized void downloadUrl(String url, String dirPath) {try {URL httpUrl = new URL(url);File dir = new File(dirPath);if (!dir.exists()) {dir.mkdirs();}String fileName = url.substring(url.lastIndexOf("/") + 1);FileUtils.copyURLToFile(httpUrl, new File(dir, fileName));// FileUtils.copyURLToFile(httpUrl, new File(dir, fileName), 30000, 10000);System.out.println("Success @ " + url);} catch (Exception e) {System.err.println("Failed @ " + url);System.err.println(e.toString());}}static synchronized void downloadUrlHttps(String url, String dirPath) {try {URL httpsUrl = new URL(url);File dir = new File(dirPath);if (!dir.exists()) {dir.mkdirs();}String str = java.net.URLDecoder.decode(url, "UTF-8");String fileName = str.substring(str.lastIndexOf("/") + 1);SSLContext context = SSLContext.getInstance("TLS");context.init(null, null, null);HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory());HttpsURLConnection connection = (HttpsURLConnection) httpsUrl.openConnection();FileUtils.copyURLToFile(connection.getURL(), new File(dir, fileName), 60000, 50000);System.out.println("Success @ " + url);} catch (Exception e) {System.err.println("Failed @ " + url);System.err.println(e.toString());}}}
在Java Project中的位置及运行结果,如下面截图所示:
