数据问题?数据库获取,消息队列中,爬虫
爬取数据:(获取请求返回的页面信息,筛选出我们想要的数据就可以了)
jsoup包
1.包入依赖
<!--解析网页--><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.10.2</version></dependency>
2.编写解析工具类
@Componentpublic class HtmlParseUtil {public static void main(String[] args) throws IOException {praseJD("java");}public static List<Content> praseJD(String keywords) throws IOException {//获取请求 https://search.jd.com/Search?keyword=java//前提,需要联网,String url = "https://search.jd.com/Search?keyword="+keywords;//解析网页(jsoup返回的Document就是浏览器的Document对象)Document document = Jsoup.parse(new URL(url), 3000);//所有在js中的方法这里都可以使用Element element = document.getElementById("J_goodsList");System.out.println(element.html());//获取所有的li元素Elements elements = element.getElementsByTag("li");ArrayList<Content> goodList = new ArrayList<>();//获取元素里面的内容for (Element el:elements){String img = el.getElementsByTag("img").eq(0).attr("data-lazy-img");String price = el.getElementsByClass("p-price").eq(0).text();String title = el.getElementsByClass("p-name").eq(0).text();Content content = new Content();content.setTitle(title);content.setPrice(price);content.setImg(img);goodList.add(content);}return goodList;}}
