代理抓取页面,获得访问地址的最终跳转地址 悟寰轩

 1 <%@page import="java.net.URI"%>
 2 <%@page import="java.io.IOException"%>
 3 <%@page import="org.apache.http.HttpHost"%>
 4 <%@page import="org.apache.http.HttpResponse"%>
 5 <%@page import="org.apache.http.HttpStatus"%>
 6 <%@page import="org.apache.http.client.ClientProtocolException"%>
 7 <%@page import="org.apache.http.client.methods.HttpGet"%>
 8 <%@page import="org.apache.http.client.methods.HttpUriRequest"%>
 9 <%@page import="org.apache.http.impl.client.DefaultHttpClient"%>
10 <%@page import="org.apache.http.protocol.BasicHttpContext"%>
11 <%@page import="org.apache.http.protocol.ExecutionContext"%>
12 <%@page import="org.apache.http.protocol.HttpContext"%>
13 <%@page import="org.apache.http.client.utils.URLEncodedUtils"%>
14 <%@page import="java.net.URLEncoder"%>
15 <%@page import="java.io.UnsupportedEncodingException"%>
16 <%@page import="org.apache.http.impl.client.DefaultRedirectHandler"%>
17 <%@page import="org.apache.http.ProtocolException"%>
18 <%@page import="org.apache.http.Header"%>
19 <%@page import="java.net.URISyntaxException"%>
20 <%@ taglib uri="http://java.sun.com/jstl/core" prefix="c" %>
21 <%@ taglib uri="http://www.duxiu.com/proxy" prefix="proxy" %>
22 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
23 <%!
24  class CustomRedirectHandler extends DefaultRedirectHandler {
25     @Override
26     public URI getLocationURI(HttpResponse response, HttpContext context) throws ProtocolException{
27         if(isRedirectRequested( response, context))
28         {
29             Header locationHeader = response.getFirstHeader("location");
30             String location= locationHeader.getValue();
31             if(location!=null&&!"".equals(location)&&!location.startsWith("http")&&location.contains("---")){
32                 response.removeHeaders("location");
33                 response.setHeader("location","-----"+location);
34                 URI uri=null;
35                 try {
36                     uri =  new URI("------"+location.substring(0, location.lastIndexOf("url=") + 4)
37                             + URLEncoder.encode(location.substring(location.indexOf("url=") + 4, location.length())));
38                 } catch (URISyntaxException e) {
39                     e.printStackTrace();
40                 }
41                 return uri;
42              }
43         }
44         return super.getLocationURI(response,context);
45         
46     }
47 }
48 %>
49 <%!public String test1(String url) {
50     DefaultHttpClient httpClient = new DefaultHttpClient();
51     CustomRedirectHandler handler=new CustomRedirectHandler();
52     httpClient.setRedirectHandler(handler);
53     HttpGet httpget = new HttpGet(url);
54     HttpContext context = new BasicHttpContext();
55     HttpResponse response = null;
56     try {
57         response = httpClient.execute(httpget, context);
58     } catch (ClientProtocolException e1) {
59         e1.printStackTrace();
60     } catch (IOException e1) {
61         e1.printStackTrace();
62     }
63     if (response.getStatusLine().getStatusCode() != HttpStatus.SC_OK)
64         try {
65             throw new IOException(response.getStatusLine().toString());
66         } catch (IOException e) {
67             e.printStackTrace();
68         }
69     HttpUriRequest currentReq = (HttpUriRequest) context.getAttribute(ExecutionContext.HTTP_REQUEST);
70     HttpHost currentHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
71     String currentUrl = (currentReq.getURI().isAbsolute()) ? currentReq.getURI().toString(): (currentHost.toURI() + currentReq.getURI());
72     return currentUrl;
73 } %>
74 <%
75 String dx = request.getParameter("dx");
76 if(dx==null||"".equals(dx))
77 {
78     out.println("dx为空!");
79     return;
80 }
81 // 获得最终访问地址
82 String url =dx;
83 out.println("url="+url);
84 String finalURL=test1(url);
85 //out.println("finalURL="+finalURL);
86 if(!url.equals(finalURL)){
87   response.sendRedirect("最终跳转地址");
88 }
89 %>

继承DefaultRedirectHandler,重写获得URI方法-----

原文地址:https://www.cnblogs.com/sunxucool/p/2821913.html