webMagic 处理get请求

例如URL:

http://localhost:8080/javascritp/queryList/?params={"name":"aaa","limit":"45","sl":"show"} 

应该是分成两个url来处理如:

String url1 = "http://localhost:8080/javascritp/queryList/?params=";

String url2 = URLEncoder.encode("{"name":"aaa","limit":"45","sl":"show"}","UTF-8");

//请求URL


public static String get(String url, Map<String, String> headers, String charset, Integer connTimeout,
Integer readTimeout) throws ConnectTimeoutException, SocketTimeoutException, Exception {
HttpClient client = null;
HttpGet get = new HttpGet(url);
String result = "";
try {
if (headers != null && !headers.isEmpty()) {
for (Entry<String, String> entry : headers.entrySet()) {
get.addHeader(entry.getKey(), entry.getValue());
}
}
// 设置参数
Builder customReqConf = RequestConfig.custom();
if (connTimeout != null) {
customReqConf.setConnectTimeout(connTimeout);
}
if (readTimeout != null) {
customReqConf.setSocketTimeout(readTimeout);
}
get.setConfig(customReqConf.build());

HttpResponse res = null;

if (url.startsWith("https")) {
// 执行 Https 请求.
client = createSSLInsecureClient();
res = client.execute(get);
} else {
// 执行 Http 请求.
client = HttpClientUtils.client;
res = client.execute(get);
}

result = IOUtils.toString(res.getEntity().getContent(), charset);
} finally {
get.releaseConnection();
if (url.startsWith("https") && client != null && client instanceof CloseableHttpClient) {
((CloseableHttpClient) client).close();
}
}
return result;
}

//WebMagic处理get请求

Request request = new Request();
request.setMethod(HttpConstant.Method.GET);
request.setUrl(url1+url2);

//启动爬虫
Spider.create(new PageProcessor())
.addPipeline(new Pipline())
.addRequest(request)
.run();

 
原文地址:https://www.cnblogs.com/joinlemon/p/9541863.html