2020春季学期第四周学习总结

本周是开学的第四周,主要学习了数据的地域处理,分类处理,提取关键词处理等。

学会了百度地图API,jieba,和python的相关知识。

package com.diyu;

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class getregion{
             
     /* 
         * 
         * 
         * 
         * 地址编码
         * 
         * */
     public static String getLocation(String lat,String lng){
           String location1="";
          String url="http://api.map.baidu.com/reverse_geocoding/v3/?ak=您的ak&output=xml&coordtype=wgs84ll&location="+lat+","+lng;
          System.out.println(url);
          Document doc = null;
           HttpURLConnection conn = null;
           InputStream ins = null;
           SAXReader reader = null;
           try{
            //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
            URL conURL = new URL(null,url);
            conn = (HttpURLConnection)conURL.openConnection();
            conn.setDoInput(true);
            conn.setDoOutput(true);
            conn.setUseCaches(false);
            ins = conn.getInputStream();
            reader =new SAXReader();
            doc= reader.read(ins);
            //System.out.println(url);
            Element root=doc.getRootElement();
            String docXmlText=doc.asXML();
            //System.out.println(docXmlText);
            Element e=root.element("result");
            Element location=e.element("formatted_address");
            location1=location.asXML();
            System.out.println(location1);
            location1=location1.substring(location1.indexOf("address>")+8,location1.indexOf("</formatted_address>"));
          
            List<Element> list = root.elements("location");
           // System.out.println(url);
            for (Element object : list) {
                System.out.println(url);
                System.out.println(object.getName());
                for (Element element : (List<Element>) object.elements()) {
                    System.out.print(((Element) element).getName() + ":");
                    System.out.print(element.getText() + " ");
                }
                System.out.println();

            }

            ins.close();
            conn.disconnect();
           }catch (MalformedURLException e) {
            e.printStackTrace();
           } catch (IOException e) {
            e.printStackTrace();   
           } catch (DocumentException e) {
            e.printStackTrace();
           }catch(Exception e){
            e.printStackTrace();
           }finally {
            try {
             if (ins != null) {
              ins.close();
              ins = null;
             }
            } catch (IOException e1) {
             e1.printStackTrace();
            }
            try {
             if (conn != null) {
              conn.disconnect();
              conn = null;
             }
            } catch (Exception e2) {
             e2.printStackTrace();
            }
           }
           return location1;
        }
     
     public static String getLocation2(String lat,String lng){
           String location1="";
          String url="http://api.map.baidu.com/reverse_geocoding/v3/?ak=您的ak&location="+lat+","+lng;
          System.out.println(url);
          Document doc = null;
           HttpURLConnection conn = null;
           InputStream ins = null;
           SAXReader reader = null;
           try{
            //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
            URL conURL = new URL(null,url);
            conn = (HttpURLConnection)conURL.openConnection();
            conn.setDoInput(true);
            conn.setDoOutput(true);
            conn.setUseCaches(false);
            ins = conn.getInputStream();
            reader =new SAXReader();
            doc= reader.read(ins);
            //System.out.println(url);
            Element root=doc.getRootElement();
            String docXmlText=doc.asXML();
            //System.out.println(docXmlText);
            Element e=root.element("result");
            Element location=e.element("addressComponent");
            Element location2=location.element("adcode");
            location1=location2.asXML();
            System.out.println(location1);
            location1=location1.substring(location1.indexOf("adcode>")+7,location1.indexOf("</adcode>"));
          System.out.println(location1);
            List<Element> list = root.elements("location");
           // System.out.println(url);
            for (Element object : list) {
                System.out.println(url);
                System.out.println(object.getName());
                for (Element element : (List<Element>) object.elements()) {
                    System.out.print(((Element) element).getName() + ":");
                    System.out.print(element.getText() + " ");
                }
                System.out.println();

            }

            ins.close();
            conn.disconnect();
           }catch (MalformedURLException e) {
            e.printStackTrace();
           } catch (IOException e) {
            e.printStackTrace();   
           } catch (DocumentException e) {
            e.printStackTrace();
           }catch(Exception e){
            e.printStackTrace();
           }finally {
            try {
             if (ins != null) {
              ins.close();
              ins = null;
             }
            } catch (IOException e1) {
             e1.printStackTrace();
            }
            try {
             if (conn != null) {
              conn.disconnect();
              conn = null;
             }
            } catch (Exception e2) {
             e2.printStackTrace();
            }
           }
           return location1;
        }
    /* 
     * 
     * 
     * 
     * 地址你编码
     * 
     * */
    public static String getlocation1(String loc){
        String location2="";
        String location3="";
          String url="http://api.map.baidu.com/geocoding/v3/?address="+loc+"&output=xml&ak=您的ak&callback=showLocation";
          System.out.println(url);
          Document doc = null;
           HttpURLConnection conn = null;
           InputStream ins = null;
           SAXReader reader = null;
           try{
            //HttpTimeoutHandler hth = new HttpTimeoutHandler(600000);
            URL conURL = new URL(null,url);
            conn = (HttpURLConnection)conURL.openConnection();
            conn.setDoInput(true);
            conn.setDoOutput(true);
            conn.setUseCaches(false);
            ins = conn.getInputStream();
            reader =new SAXReader();
            doc= reader.read(ins);
            //System.out.println(url);
            Element root=doc.getRootElement();
            String docXmlText=doc.asXML();
            //System.out.println(docXmlText);
            Element e=root.element("result");
            Element location=e.element("location");
            Element lng=location.element("lng");
            Element lat=location.element("lat");
            String lng1=lng.asXML();
            String lat1=lat.asXML();
            System.out.println("lng"+lng1);
            System.out.println("lat"+lat1);
           // System.out.println("location"+location.asXML());
            //System.out.println("xiayukun"+e.asXML());
            lng1=lng1.substring(lng1.indexOf("<lng>")+5,lng1.indexOf("</lng>"));
            
            System.out.println(lng1);
            lat1=lat1.substring(lat1.indexOf("<lat>")+5,lat1.indexOf("</lat>"));
            System.out.println(lat1);
            //location2=getLocation(lat1,lng1);
            location3=getLocation2(lat1,lng1);
            List<Element> list = root.elements("location");
            System.out.println(url);
            for (Element object : list) {
                System.out.println(url);
                System.out.println(object.getName());
                for (Element element : (List<Element>) object.elements()) {
                    System.out.print(((Element) element).getName() + ":");
                    System.out.print(element.getText() + " ");
                }
                System.out.println();

            }

            ins.close();
            conn.disconnect();
           }catch (MalformedURLException e) {
            e.printStackTrace();
           } catch (IOException e) {
            e.printStackTrace();   
           } catch (DocumentException e) {
            e.printStackTrace();
           }catch(Exception e){
            e.printStackTrace();
           }finally {
            try {
             if (ins != null) {
              ins.close();
              ins = null;
             }
            } catch (IOException e1) {
             e1.printStackTrace();
            }
            try {
             if (conn != null) {
              conn.disconnect();
              conn = null;
             }
            } catch (Exception e2) {
             e2.printStackTrace();
            }
           }
           return location3;
           //return location2;
        }
    public static String SELECT(String name)
    {
        String result="";
        result=getlocation1(name);
        return result;
    }
    public static String SELECT1(String name)
    {
        String result="";
        result=getlocation1(name);
        return result;
    }
    }
#coding:utf-8
import jieba
import jieba.analyse

def select(text):
#第一步:分词,这里使用结巴分词全模式
 fenci_text = jieba.cut(text)
# print("/ ".join(fenci_text))

#第二步:去停用词
#这里是有一个文件存放要改的文章,一个文件存放停用表,然后和停用表里的词比较,一样的就删掉,最后把结果存放在一个文件中
 stopwords = {}.fromkeys([ line.rstrip() for line in open('stopwords.txt',encoding='utf-8') ])
 final = ""
 for word in fenci_text:
    if word not in stopwords:
        if (word != "" and word != "") :
            final = final + " " + word
# print(final)

#第三步:提取关键词
 a=jieba.analyse.extract_tags(text, topK = 5, withWeight = True, allowPOS = ())
 b=jieba.analyse.extract_tags(text, topK = 6,   allowPOS = ())
 # print(a)
 c=""
 for i in range(0,b.__len__()):
   #print(b[i])
   c=c+b[i]+" "
 print(c[:-1])
 return c[:-1]
if __name__ == '__main__':
    select('''要分析的字段''');
#text 为待提取的文本
# topK:返回几个 TF/IDF 权重最大的关键词,默认值为20。
# withWeight:是否一并返回关键词权重值,默认值为False。
# allowPOS:仅包括指定词性的词,默认值为空,即不进行筛选。
原文地址:https://www.cnblogs.com/lover995/p/12507942.html