Jsoup+HttpClient获取新浪新闻数据

package com.test;import java.io.IOException; import java.net.URI; import org.apache.http.Header;import org.apache.http.HttpEntity;import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.methods.HttpGet;import org.apache.http.client.utils.URIBuilder; import org.apache.http.impl.client.CloseableHttpClient;import org.apache.http.impl.client.HttpClients;import org.apache.http.util.EntityUtils;/** * * 依赖 commons-httpclient-3.1.jar commons-codec-1.4.jar * * @author tianjun * */public class PostTest {public static void main(String[] args) throws Exception {// (1)构造HttpClient的实例CloseableHttpClient httpCLient = HttpClients.createDefault();// 创建get请求实例HttpGet httpget = new HttpGet() ;//设置参数 //http://roll.news.sina.com.cn/s/channel.php?ch=01#col=91&spec=&type=&ch=01&k=&offset_page=0&offset_num=0&num=60&asc=&page=NaN //http://roll.news.sina.com.cn/s/channel.php?col=91&spec=&type=&ch=01&offset_page=0&offset_num=0&num=60&page=1URI uri = new URIBuilder().setScheme("http").setHost("roll.news.sina.com.cn").setPath("/s/channel.php").setParameter("ch", "01").setParameter("col", "91").setParameter("spec","").setParameter("type", "").setParameter("ch", "01").setParameter("offset_page", "0").setParameter("offset_num", "0").setParameter("num", "60").setParameter("page", "1").build();httpget.setURI(uri);//设置请求头信息/* */httpget.setHeader("Accep", "*/*");httpget.setHeader("Accept-Encoding","gzip, deflate");httpget.setHeader("Accept-Language","zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3");httpget.setHeader("Connection","keep-alive");httpget.setHeader("Host","roll.news.sina.com.cn");httpget.setHeader("Referer","?ch=01");httpget.setHeader("User-Agent","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0");httpget.setHeader("Content-Type","text/html;charset=UTF-8");System.out.println("executing request "+httpget.getURI());try{// 客户端执行get请求 返回响应实体HttpResponse response = httpCLient.execute(httpget);// 服务器响应状态行System.out.println(response.getStatusLine());Header[] heads = response.getAllHeaders();// 打印所有响应头for(Header h:heads){System.out.println(h.getName()+":"+h.getValue());}// 获取响应消息实体HttpEntity entity = response.getEntity();System.out.println("————————————");if(entity != null){//响应内容System.out.println( new String(EntityUtils.toString(entity).getBytes("ISO-8859-1"),"gbk"));System.out.println("—————————————-");// 响应内容长度System.out.println("响应内容长度:"+entity.getContentLength());}} catch (ClientProtocolException e){e.printStackTrace();} catch (IOException e){e.printStackTrace();}finally{httpCLient.getConnectionManager().shutdown();}}}

,做事不怕难,自无难人事。

Jsoup+HttpClient获取新浪新闻数据

相关文章:

你感兴趣的文章:

标签云: