URL相关的工具类

package com.opslab.util.web;

import com.opslab.util.CharUtil;
import com.opslab.util.CharsetUtil;
import com.opslab.util.StringUtil;

import javax.servlet.http.HttpServletRequest;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* URL相关的工具类
* 以下工具类来自开源项目jodd
* <p/>
* Here is an example of full URL:
* {@literal https://jodd:ddoj@www.jodd.org:8080/file;p=1?q=2#third}.
* It consist of:
* <ul>
* <li>scheme (https)</li>
* <li>user (jodd)</li>
* <li>password (ddoj)</li>
* <li>host (www.jodd.org)</li>
* <li>port (8080)</li>
* <li>path (file)</li>
* <li>path parameter (p=1)</li>
* <li>query parameter (q=2)</li>
* <li>fragment (third)</li>
* </ul>
*/
public final class UrlUtil {
private static final String SCHEME_PATTERN = "([^:/?#]+):";

private static final String HTTP_PATTERN = "(http|https):";

private static final String USERINFO_PATTERN = "([^@/]*)";

private static final String HOST_PATTERN = "([^/?#:]*)";

private static final String PORT_PATTERN = "(\d*)";

private static final String PATH_PATTERN = "([^?#]*)";

private static final String QUERY_PATTERN = "([^#]*)";

private static final String LAST_PATTERN = "(.*)";

// Regex patterns that matches URIs. See RFC 3986, appendix B

private static final Pattern URI_PATTERN = Pattern.compile("^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN +
")?" + ")?" + PATH_PATTERN + "(\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?");

private static final Pattern HTTP_URL_PATTERN = Pattern.compile('^' + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" +
PATH_PATTERN + "(\?" + LAST_PATTERN + ")?");

/**
* Encodes single URI component.
*/
private static String encodeUriComponent(String source, String encoding, URIPart uriPart) {
if (source == null) {
return null;
}

byte[] bytes;
try {
bytes = encodeBytes(source.getBytes(encoding), uriPart);
} catch (UnsupportedEncodingException ignore) {
return null;
}

char[] chars = new char[bytes.length];
for (int i = 0; i < bytes.length; i++) {
chars[i] = (char) bytes[i];
}
return new String(chars);
}

/**
* Encodes byte array using allowed characters from {@link URIPart}.
*/
private static byte[] encodeBytes(byte[] source, URIPart uriPart) {
ByteArrayOutputStream bos = new ByteArrayOutputStream(source.length);
for (byte b : source) {
if (b < 0) {
b += 256;
}
if (uriPart.isValid((char) b)) {
bos.write(b);
} else {
bos.write('%');
char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
bos.write(hex1);
bos.write(hex2);
}
}
return bos.toByteArray();
}

/**
* Encodes the given URI scheme with the given encoding.
*/
public static String encodeScheme(String scheme, String encoding) {
return encodeUriComponent(scheme, encoding, URIPart.SCHEME);
}

public static String encodeScheme(String scheme) {
return encodeUriComponent(scheme, CharsetUtil.UTF_8, URIPart.SCHEME);
}

/**
* Encodes the given URI user info with the given encoding.
*/
public static String encodeUserInfo(String userInfo, String encoding) {
return encodeUriComponent(userInfo, encoding, URIPart.USER_INFO);
}

/* /**
* Encodes the given URI authority with the given encoding.
*

public static String encodeAuthority(String authority, String encoding) {
return encodeUriComponent(authority, encoding, URIPart.AUTHORITY);
}
public static String encodeAuthority(String authority) {
return encodeUriComponent(authority, JoddCore.encoding, URIPart.AUTHORITY);
}
*/

public static String encodeUserInfo(String userInfo) {
return encodeUriComponent(userInfo, CharsetUtil.UTF_8, URIPart.USER_INFO);
}

/**
* Encodes the given URI host with the given encoding.
*/
public static String encodeHost(String host, String encoding) {
return encodeUriComponent(host, encoding, URIPart.HOST);
}

public static String encodeHost(String host) {
return encodeUriComponent(host, CharsetUtil.UTF_8, URIPart.HOST);
}

/**
* Encodes the given URI port with the given encoding.
*/
public static String encodePort(String port, String encoding) {
return encodeUriComponent(port, encoding, URIPart.PORT);
}

public static String encodePort(String port) {
return encodeUriComponent(port, CharsetUtil.UTF_8, URIPart.PORT);
}

/**
* Encodes the given URI path with the given encoding.
*/
public static String encodePath(String path, String encoding) {
return encodeUriComponent(path, encoding, URIPart.PATH);
}

public static String encodePath(String path) {
return encodeUriComponent(path, CharsetUtil.UTF_8, URIPart.PATH);
}

/**
* Encodes the given URI path segment with the given encoding.
*/
public static String encodePathSegment(String segment, String encoding) {
return encodeUriComponent(segment, encoding, URIPart.PATH_SEGMENT);
}

public static String encodePathSegment(String segment) {
return encodeUriComponent(segment, CharsetUtil.UTF_8, URIPart.PATH_SEGMENT);
}

/**
* Encodes the given URI query with the given encoding.
*/
public static String encodeQuery(String query, String encoding) {
return encodeUriComponent(query, encoding, URIPart.QUERY);
}

public static String encodeQuery(String query) {
return encodeUriComponent(query, CharsetUtil.UTF_8, URIPart.QUERY);
}

/**
* Encodes the given URI query parameter with the given encoding.
*/
public static String encodeQueryParam(String queryParam, String encoding) {
return encodeUriComponent(queryParam, encoding, URIPart.QUERY_PARAM);
}

public static String encodeQueryParam(String queryParam) {
return encodeUriComponent(queryParam, CharsetUtil.UTF_8, URIPart.QUERY_PARAM);
}

/**
* Encodes the given URI fragment with the given encoding.
*/
public static String encodeFragment(String fragment, String encoding) {
return encodeUriComponent(fragment, encoding, URIPart.FRAGMENT);
}

public static String encodeFragment(String fragment) {
return encodeUriComponent(fragment, CharsetUtil.UTF_8, URIPart.FRAGMENT);
}

/**
* @see #encode(String, String)
*/
public static String encode(String uri) {
return encode(uri, CharsetUtil.UTF_8);
}

/**
* Encodes the given source URI into an encoded String. All various URI components are
* encoded according to their respective valid character sets.
* <p>This method does <b>not</b> attempt to encode "=" and "{@literal &}"
* characters in query parameter names and query parameter values because they cannot
* be parsed in a reliable way.
*/
public static String encode(String uri, String encoding) {
Matcher m = URI_PATTERN.matcher(uri);
if (m.matches()) {
String scheme = m.group(2);
String authority = m.group(3);
String userinfo = m.group(5);
String host = m.group(6);
String port = m.group(8);
String path = m.group(9);
String query = m.group(11);
String fragment = m.group(13);

return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, encoding);
}
throw new IllegalArgumentException("Invalid URI: " + uri);
}

/**
* @see #encodeHttpUrl(String, String)
*/
public static String encodeHttpUrl(String httpUrl) {
return encodeHttpUrl(httpUrl, CharsetUtil.UTF_8);
}

/**
* Encodes the given HTTP URI into an encoded String. All various URI components are
* encoded according to their respective valid character sets.
* <p>This method does <b>not</b> support fragments ({@code #}),
* as these are not supposed to be sent to the server, but retained by the client.
* <p>This method does <b>not</b> attempt to encode "=" and "{@literal &}"
* characters in query parameter names and query parameter values because they cannot
* be parsed in a reliable way.
*/
public static String encodeHttpUrl(String httpUrl, String encoding) {
Matcher m = HTTP_URL_PATTERN.matcher(httpUrl);
if (m.matches()) {
String scheme = m.group(1);
String authority = m.group(2);
String userinfo = m.group(4);
String host = m.group(5);
String portString = m.group(7);
String path = m.group(8);
String query = m.group(10);

return encodeUriComponents(scheme, authority, userinfo, host, portString, path, query, null, encoding);
}
throw new IllegalArgumentException("Invalid HTTP URL: " + httpUrl);
}

private static String encodeUriComponents(String scheme, String authority, String userInfo, String host,
String port, String path, String query, String fragment, String encoding) {

StringBuilder sb = new StringBuilder();

if (scheme != null) {
sb.append(encodeScheme(scheme, encoding));
sb.append(':');
}

if (authority != null) {
sb.append("//");
if (userInfo != null) {
sb.append(encodeUserInfo(userInfo, encoding));
sb.append('@');
}
if (host != null) {
sb.append(encodeHost(host, encoding));
}
if (port != null) {
sb.append(':');
sb.append(encodePort(port, encoding));
}
}

sb.append(encodePath(path, encoding));

if (query != null) {
sb.append('?');
sb.append(encodeQuery(query, encoding));
}

if (fragment != null) {
sb.append('#');
sb.append(encodeFragment(fragment, encoding));
}

return sb.toString();
}

/**
* Creates URL builder for user-friendly way of building URLs.
* Provided path is parsed and {@link #encode(String) encoded}.
*
* @see #build(String, boolean)
*/
public static Builder build(String path) {
return build(path, true);
}

/**
* Creates URL builder with given path that can be optionally encoded.
* Since most of the time path is valid and does not require to be encoded,
* use this method to gain some performance. When encoding flag is turned off,
* provided path is used without processing.
* <p/>
* The purpose of builder is to help with query parameters. All other URI parts
* should be set previously or after the URL is built.
*/
public static Builder build(String path, boolean encodePath) {
return new Builder(path, encodePath, CharsetUtil.UTF_8);
}

/**
* Decodes URL elements.
*/
public static String decode(String url) {
return decode(url, CharsetUtil.UTF_8, false);
}

/**
* Decodes URL elements. This method may be used for all
* parts of URL, except for the query parts, since it does
* not decode the '+' character.
*
* @see #decodeQuery(String, String)
*/
public static String decode(String source, String encoding) {
return decode(source, encoding, false);
}

/**
* Decodes query name or value.
*/
public static String decodeQuery(String source) {
return decode(source, CharsetUtil.UTF_8, true);
}

/**
* Decodes query name or value.
*/
public static String decodeQuery(String source, String encoding) {
return decode(source, encoding, true);
}

private static String decode(String source, String encoding, boolean decodePlus) {
int length = source.length();
ByteArrayOutputStream bos = new ByteArrayOutputStream(length);

boolean changed = false;

for (int i = 0; i < length; i++) {
int ch = source.charAt(i);
switch (ch) {
case '%':
if ((i + 2) < length) {
char hex1 = source.charAt(i + 1);
char hex2 = source.charAt(i + 2);
int u = Character.digit(hex1, 16);
int l = Character.digit(hex2, 16);
if (u == -1 || l == -1) {
throw new IllegalArgumentException("Invalid sequence: " + source.substring(i));
}
bos.write((char) ((u << 4) + l));
i += 2;
changed = true;
} else {
throw new IllegalArgumentException("Invalid sequence: " + source.substring(i));
}
break;

case '+':
if (decodePlus) {
ch = ' ';
changed = true;
}

default:
bos.write(ch);
}
}
try {
return changed ? new String(bos.toByteArray(), encoding) : source;
} catch (UnsupportedEncodingException ignore) {
return null;
}
}

/**
* Enumeration to identify the parts of a URI.
* <p/>
* Contains methods to indicate whether a given character is valid in a specific URI component.
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>
*/
enum URIPart {

SCHEME {
@Override
public boolean isValid(char c) {
return CharUtil.isAlpha(c) || CharUtil.isDigit(c) || c == '+' || c == '-' || c == '.';
}
},
// AUTHORITY {
// @Override
// public boolean isValid(char c) {
// return isUnreserved(c) || isSubDelimiter(c) || c == ':' || c == '@';
// }
// },
USER_INFO {
@Override
public boolean isValid(char c) {
return CharUtil.isUnreserved(c) || CharUtil.isSubDelimiter(c) || c == ':';
}
},
HOST {
@Override
public boolean isValid(char c) {
return CharUtil.isUnreserved(c) || CharUtil.isSubDelimiter(c);
}
},
PORT {
@Override
public boolean isValid(char c) {
return CharUtil.isDigit(c);
}
},
PATH {
@Override
public boolean isValid(char c) {
return CharUtil.isPchar(c) || c == '/';
}
},
PATH_SEGMENT {
@Override
public boolean isValid(char c) {
return CharUtil.isPchar(c);
}
},
QUERY {
@Override
public boolean isValid(char c) {
return CharUtil.isPchar(c) || c == '/' || c == '?';
}
},
QUERY_PARAM {
@Override
public boolean isValid(char c) {
if (c == '=' || c == '+' || c == '&' || c == ';') {
return false;
}
return CharUtil.isPchar(c) || c == '/' || c == '?';
}
},
FRAGMENT {
@Override
public boolean isValid(char c) {
return CharUtil.isPchar(c) || c == '/' || c == '?';
}
};

/**
* Indicates whether the given character is allowed in this URI component.
*
* @return <code>true</code> if the character is allowed; {@code false} otherwise
*/
public abstract boolean isValid(char c);

}

public static class Builder {
protected final StringBuilder url;
protected final String encoding;
protected boolean hasParams;

public Builder(String path, boolean encodePath, String encoding) {
this.encoding = encoding;
url = new StringBuilder();
if (encodePath) {
url.append(encode(path, encoding));
} else {
url.append(path);
}
this.hasParams = url.indexOf("?") != -1;
}

/**
* Appends new query parameter to the url.
*/
public Builder queryParam(String name, String value) {
url.append(hasParams ? '&' : '?');
hasParams = true;

url.append(encodeQueryParam(name, encoding));

if ((value != null) && (value.length() > 0)) {
url.append('=');
url.append(encodeQueryParam(value, encoding));
}
return this;
}

/**
* Returns full URL.
*/
@Override
public String toString() {
return url.toString();
}
}

public final static String setParam(String url, String paramName, String paramValue) {
int temp_index = url.indexOf("?");
if (temp_index != -1) {
int param_index = url.indexOf(paramName + "=", temp_index + 1);
if (param_index != -1) {
temp_index = url.indexOf("&", param_index + paramName.length() + 1);
if (temp_index != -1) {
return url.substring(0, param_index) + paramName + "=" + paramValue + url.substring(temp_index);
}
return url.substring(0, param_index) + paramName + "=" + paramValue;
} else {
if (url.lastIndexOf("&") == url.length() - 1) {
return url + paramName + "=" + paramValue;
}
return url + "&" + paramName + "=" + paramValue;
}
} else {
return url + "?" + paramName + "=" + paramValue;
}
}

public static final String getParamValue(String url, String paramName) {
int temp_index = url.indexOf("?");
if (temp_index != -1) {
int param_index = url.indexOf(paramName + "=", temp_index + 1);
if (param_index != -1) {
temp_index = url.indexOf("&", param_index + paramName.length() + 1);
if (temp_index != -1) {
return url.substring(param_index + paramName.length() + 1, temp_index);
}
return url.substring(param_index + paramName.length() + 1);
} else {
return null;
}
} else {
return null;
}
}

public static final String removeParam(String url, String... paramNames) {
for (String paramName : paramNames) {
url = removeParam(url, paramName);
}
return url;
}

public static final String removeParam(String url, String paramName) {
int temp_index = url.indexOf("?");
if (temp_index != -1) {
int param_index = url.indexOf(paramName + "=", temp_index + 1);
if (param_index != -1) {
temp_index = url.indexOf("&", param_index + paramName.length() + 1);
if (temp_index != -1) {
return url.substring(0, param_index) + url.substring(temp_index + 1);
}
return url.substring(0, param_index - 1);

} else {
return url;
}
} else {
return url;
}
}


/**
* Example:
* <pre>
* {@code
* url: http://tt.se/ Location: /start => http://tt.se/start
* url: http://localhost/moved_perm Location: / => http://localhost/
* url: http://github.com/ Location: http://github.com/ => https://github.com/
* }
*
* (If the new url throws a MalformedURLException the url String representation will be returned.)
*/
public static String urlJoin(URL url, String locationHeader) {
try {
if (locationHeader.startsWith("http")) {
return new URL(locationHeader).toString();
}
return new URL(url.getProtocol() + "://" + url.getAuthority() + locationHeader).toString();
} catch (MalformedURLException e) {
return url.toString();
}

}


/**
* 获取HTTPrequest的请求参数
* @param request http请求
*/
public static Map print(HttpServletRequest request) {
Map<String,String> map = new HashMap<>();
Enumeration paramNames = request.getParameterNames();
while (paramNames.hasMoreElements()) {
String paramName = (String) paramNames.nextElement();
String[] paramValues = request.getParameterValues(paramName);
if (paramValues.length == 1) {
String paramValue = paramValues[0];
if (paramValue.length() != 0) {
map.put(paramName, paramValue);
}
}
}
return map;
}


/**
* 解析字符串返回map键值对(例:a=1&b=2 => a=1,b=2)
*
* @param query 源参数字符串
* @param split1 键值对之间的分隔符(例:&)
* @param split2 key与value之间的分隔符(例:=)
* @param dupLink 重复参数名的参数值之间的连接符,连接后的字符串作为该参数的参数值,可为null
* null:不允许重复参数名出现,则靠后的参数值会覆盖掉靠前的参数值。
* @return map
*/
public static Map<String,String> parseQuery(String query, char split1, char split2, String dupLink) {
if (!StringUtil.isEmpty(query) && query.indexOf(split2) > 0) {
Map<String,String> result = new HashMap<>();

String name = null;
String value = null;
String tempValue = "";
int len = query.length();
for (int i = 0; i < len; i++) {
char c = query.charAt(i);
if (c == split2) {
value = "";
} else if (c == split1) {
if (!StringUtil.isEmpty(name) && value != null) {
if (dupLink != null) {
tempValue = result.get(name);
if (tempValue != null) {
value += dupLink + tempValue;
}
}
result.put(name, value);
}
name = null;
value = null;
} else if (value != null) {
value += c;
} else {
name = (name != null) ? (name + c) : "" + c;
}
}

if (!StringUtil.isEmpty(name) && value != null) {
if (dupLink != null) {
tempValue = result.get(name);
if (tempValue != null) {
value += dupLink + tempValue;
}
}
result.put(name, value);
}

return result;
}
return null;
}

/**
* 解析http请求URI
* @param queryUri http请求的uri
*/
public static Map<String,String> httpParseQuery(String queryUri){
Map<String,String> result = new HashMap<>();
if(!StringUtil.isEmpty(queryUri)){
result = parseQuery(queryUri,'&','=',",");
}
return result;
}
}

原文地址:https://www.cnblogs.com/chinaifae/p/10254786.html