Scripts 学盟
标题:
关于 Java 的 HTTP 抓取 [GET 方式请求]
[打印本页]
作者:
Alvin
时间:
2011-4-29 22:12:14
标题:
关于 Java 的 HTTP 抓取 [GET 方式请求]
下面是一个简单例子代码,可供参考.
/**
* (#)HttpGet.java 创建时间:Apr 29, 2011 9:36:43 PM<br />
*/
package org.iscripts.common.net;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author 林俊海(ialvin.cn) 广东·普宁·里湖
*/
public class HttpGet {
public static void main(String[] args) throws IOException {
System.out.println(HttpGet.get("http://www.iscripts.org/", "utf-8"));
}
public static String get(String strURL) throws IOException {
URLConnection conn = HttpGet.openURL(strURL);
return HttpGet.read(conn.getInputStream(), HttpGet.getContentEncoding(conn));
}
public static String get(String strURL, String encoding) throws IOException {
return HttpGet.read(HttpGet.openURL(strURL).getInputStream(), encoding);
}
private static URLConnection openURL(String strURL) throws IOException {
URLConnection conn = new URL(strURL).openConnection();
conn.connect();
return conn;
}
private static String read(InputStream in, String encoding) throws IOException {
if (encoding == null) encoding = "utf-8";
BufferedReader reader = new BufferedReader(new InputStreamReader(in, encoding));
char[] str = new char[4096];
StringBuilder builder = new StringBuilder();
for (int len; (len = reader.read(str)) > -1;)
builder.append(str, 0, len);
return builder.toString();
}
private static String getContentEncoding(URLConnection conn) {
String contentType = conn.getContentType();
if (contentType == null) return null;
final Pattern ptnCharset = Pattern.compile("(?i)\\bcharset=([^\\s;]+)");
Matcher m = ptnCharset.matcher(contentType);
if (m.find())
return m.group(1);
return null;
}
}
复制代码
欢迎光临 Scripts 学盟 (http://www.iscripts.org/)
Powered by Discuz! X2