数据清洗——根据地名转化成标准地址

在数据清洗过程中,行政区域代码转换最麻烦,繁琐。

  1 package Util;
  2 
  3 import java.io.IOException;
  4 import java.io.InputStream;
  5 import java.net.HttpURLConnection;
  6 import java.net.MalformedURLException;
  7 import java.net.URL;
  8 import java.util.List;
  9 
 10 import org.dom4j.Document;
 11 import org.dom4j.DocumentException;
 12 import org.dom4j.Element;
 13 import org.dom4j.io.SAXReader;
 14 
 15 public class getregion{
 16              
 17      /* 
 18          * 
 19          * 
 20          * 
 21          * 地址编码
 22          * 
 23          * */
 24      public static String getLocation(String lat,String lng){
 25            String location1="";
 26           String url="http://api.map.baidu.com/reverse_geocoding/v3/?ak=ld0uqubVfSTAUlXH5qIMN2F3Snsp16LU&output=xml&coordtype=wgs84ll&location="+lat+","+lng;
 27           System.out.println(url);
 28           Document doc = null;
 29            java.net.HttpURLConnection conn = null;
 30            InputStream ins = null;
 31            SAXReader reader = null;
 32            try{
 33             //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
 34             URL conURL = new URL(null,url);
 35             conn = (HttpURLConnection)conURL.openConnection();
 36             conn.setDoInput(true);
 37             conn.setDoOutput(true);
 38             conn.setUseCaches(false);
 39             ins = conn.getInputStream();
 40             reader =new SAXReader();
 41             doc= reader.read(ins);
 42             //System.out.println(url);
 43             Element root=doc.getRootElement();
 44             String docXmlText=doc.asXML();
 45             //System.out.println(docXmlText);
 46             Element e=root.element("result");
 47             Element location=e.element("formatted_address");
 48             location1=location.asXML();
 49             location1=location1.substring(location1.indexOf("address>")+8,location1.indexOf("</formatted_address>"));
 50            /* System.out.println("lng"+lng1);
 51             System.out.println("lat"+lat1);
 52            // System.out.println("location"+location.asXML());
 53             //System.out.println("xiayukun"+e.asXML());
 54             lng1=lng1.substring(lng1.indexOf("<lng>")+5,lng1.indexOf("</lng>"));
 55             
 56             System.out.println(lng1);
 57             lat1=lat1.substring(lat1.indexOf("<lat>")+5,lat1.indexOf("</lat>"));
 58             System.out.println(lat1);*/
 59             List<Element> list = root.elements("location");
 60             System.out.println(url);
 61             for (Element object : list) {
 62                 System.out.println(url);
 63                 System.out.println(object.getName());
 64                 for (Element element : (List<Element>) object.elements()) {
 65                     System.out.print(((Element) element).getName() + ":");
 66                     System.out.print(element.getText() + " ");
 67                 }
 68                 System.out.println();
 69 
 70             }
 71 
 72             ins.close();
 73             conn.disconnect();
 74            }catch (MalformedURLException e) {
 75             e.printStackTrace();
 76            } catch (IOException e) {
 77             e.printStackTrace();   
 78            } catch (DocumentException e) {
 79             e.printStackTrace();
 80            }catch(Exception e){
 81             e.printStackTrace();
 82            }finally {
 83             try {
 84              if (ins != null) {
 85               ins.close();
 86               ins = null;
 87              }
 88             } catch (IOException e1) {
 89              e1.printStackTrace();
 90             }
 91             try {
 92              if (conn != null) {
 93               conn.disconnect();
 94               conn = null;
 95              }
 96             } catch (Exception e2) {
 97              e2.printStackTrace();
 98             }
 99            }
100            return location1;
101         }
102     /* 
103      * 
104      * 
105      * 
106      * 地址你编码
107      * 
108      * */
109     public static String getlocation1(String loc){
110         String location2="";
111           String url="http://api.map.baidu.com/geocoding/v3/?address="+loc+"10号&output=xml&ak=ld0uqubVfSTAUlXH5qIMN2F3Snsp16LU&callback=showLocation";
112           System.out.println(url);
113           Document doc = null;
114            java.net.HttpURLConnection conn = null;
115            InputStream ins = null;
116            SAXReader reader = null;
117            try{
118             //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
119             URL conURL = new URL(null,url);
120             conn = (HttpURLConnection)conURL.openConnection();
121             conn.setDoInput(true);
122             conn.setDoOutput(true);
123             conn.setUseCaches(false);
124             ins = conn.getInputStream();
125             reader =new SAXReader();
126             doc= reader.read(ins);
127             //System.out.println(url);
128             Element root=doc.getRootElement();
129             String docXmlText=doc.asXML();
130             //System.out.println(docXmlText);
131             Element e=root.element("result");
132             Element location=e.element("location");
133             Element lng=location.element("lng");
134             Element lat=location.element("lat");
135             String lng1=lng.asXML();
136             String lat1=lat.asXML();
137             System.out.println("lng"+lng1);
138             System.out.println("lat"+lat1);
139            // System.out.println("location"+location.asXML());
140             //System.out.println("xiayukun"+e.asXML());
141             lng1=lng1.substring(lng1.indexOf("<lng>")+5,lng1.indexOf("</lng>"));
142             
143             System.out.println(lng1);
144             lat1=lat1.substring(lat1.indexOf("<lat>")+5,lat1.indexOf("</lat>"));
145             System.out.println(lat1);
146             location2=getLocation(lat1,lng1);
147             List<Element> list = root.elements("location");
148             System.out.println(url);
149             for (Element object : list) {
150                 System.out.println(url);
151                 System.out.println(object.getName());
152                 for (Element element : (List<Element>) object.elements()) {
153                     System.out.print(((Element) element).getName() + ":");
154                     System.out.print(element.getText() + " ");
155                 }
156                 System.out.println();
157 
158             }
159 
160             ins.close();
161             conn.disconnect();
162            }catch (MalformedURLException e) {
163             e.printStackTrace();
164            } catch (IOException e) {
165             e.printStackTrace();   
166            } catch (DocumentException e) {
167             e.printStackTrace();
168            }catch(Exception e){
169             e.printStackTrace();
170            }finally {
171             try {
172              if (ins != null) {
173               ins.close();
174               ins = null;
175              }
176             } catch (IOException e1) {
177              e1.printStackTrace();
178             }
179             try {
180              if (conn != null) {
181               conn.disconnect();
182               conn = null;
183              }
184             } catch (Exception e2) {
185              e2.printStackTrace();
186             }
187            }
188            return location2;
189         }
190     
191     public static void main(String[] strgs){
192         System.out.println("aaaa");
193         String loc="河北邯郸马头经济开发区";
194         System.out.println(loc);
195         String structloc="";
196         structloc=getlocation1(loc);
197         System.out.println("struct_location:"+structloc);
198         
199         /*//System.out.println(addressResolution("湖北省武汉市洪山区"));
200         //getPoint("河北省迁安市聚鑫街2126号");
201         ArrayList<Map<String,String>> table=new ArrayList<Map<String,String>>();
202         table = addressResolution(structloc);
203         System.out.println(table.size());                
204         for(int i = 0; i < table.size(); i++){
205             System.out.println(table.get(i).get("province")+table.get(i).get("city")+table.get(i).get("county"));
206         }*/
207     }
208     
209     
210     
211 }
原文地址:https://www.cnblogs.com/smartisn/p/11806801.html