xml的解析与创建——bing到youdao导入文件的转换

        首先是为了解决一个问题:如何将必应单词本中记录的单词转入到有道词典中去。实际上,必应词典可以导出xml文件,但是该文件有道词典无法解析。这里涉及到xml的解析和创建了。

        代码如下:

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintWriter;
import java.text.SimpleDateFormat;
import java.util.Date;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;


public class bing2youdao {
	private  String bingPath;//input filepath
	private String time;// filtering time (Default:current time)
	private String tag=null;
	final static String youdao="youdao.xml";//output file
	
	public bing2youdao(String filename){
		bingPath=filename;
		SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd");
		time=sdf.format(new Date());
	}
	public bing2youdao(String filename,String d){
		bingPath=filename;
		time=d;
	}
	public bing2youdao(String filename,String d,String t){
		this(filename,d);
		tag=t;
	}
	
	
	public void run(){
		Node word=null;
		Node detail=null;
		String danci=null;
		String pronunciation=null;
		String definition=null;
		String data=null;
		try {
						
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();  
            DocumentBuilder builder = factory.newDocumentBuilder();  
            Document  document = builder.newDocument();  
			Element wordbook = document.createElement("wordbook");   //创建根节点  
            document.appendChild(wordbook);
			
			DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
			DocumentBuilder db = dbf.newDocumentBuilder();
			Document dm = db.parse(new File(bingPath));//获得根元素
			Element bingWord = dm.getDocumentElement();
			NodeList wordList= bingWord.getChildNodes().item(1).getChildNodes();
			for (int i = 1; i < wordList.getLength(); i++) {
				   word = wordList.item(i);
			       NodeList nodeDetail = word.getChildNodes();
			       for (int j = 0; j < nodeDetail.getLength(); j++) {
			        	detail = nodeDetail.item(j);
			        	if ("Eng".equals(detail.getNodeName()))
			        		  danci=detail.getTextContent();
			            else if ("Phonetic".equals(detail.getNodeName()))
			            	  pronunciation=detail.getTextContent();
			            else if ("Defi".equals(detail.getNodeName()))
			            	  definition=detail.getTextContent();
			            else if("Date".equals(detail.getNodeName())) 
			            	  data=detail.getTextContent();	   
			       }
			       
			       //If the recorded time is after the filtering time than output it
			       if(timeDecision(data)&&nodeDetail.getLength()>0){
			    	   Element item= document.createElement("item"); 
			    	   
			    	   Element vacbulary= document.createElement("word"); 
			    	   vacbulary.appendChild(document.createTextNode(danci)); 
			    	   item.appendChild(vacbulary); 
			    	   
			    	   Element trans = document.createElement("trans"); 
			    	   trans.appendChild(document.createTextNode(definition)); 
			    	   item.appendChild(trans); 
			    	   
			    	   Element phonetic = document.createElement("phonetic"); 
			    	   phonetic.appendChild(document.createTextNode(pronunciation)); 
			    	   item.appendChild(phonetic);
			    	   Element tags = document.createElement("tags");
			    	   if(tag!=null)
			    	        tags.appendChild(document.createTextNode(tag)); 
			    	   item.appendChild(tags);
			    	   
			    	   Element progress = document.createElement("progress"); 
			    	   progress.appendChild(document.createTextNode(1+""));//复习进度,默认为1,可调为-1表示不加入复习计划
			    	   item.appendChild(progress); 
			    	   
			    	   wordbook.appendChild(item);
			       }
			}
			
			try {
			    TransformerFactory tf = TransformerFactory.newInstance();
	            Transformer transformer = tf.newTransformer();
	            DOMSource source = new DOMSource(document);
	            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
	            PrintWriter pw = new PrintWriter(new FileOutputStream(youdao));
	            StreamResult result = new StreamResult(pw);
	            transformer.transform(source, result);
	            System.out.println("【必应词典】转换为【有道词典】成功!");
	        } 
			catch (TransformerConfigurationException e) {
	            System.out.println(e.getMessage());
	        } 
			catch (IllegalArgumentException e) {
	            System.out.println(e.getMessage());
	        } 
			catch (FileNotFoundException e) {
	            System.out.println(e.getMessage());
	        } 
			catch (TransformerException e) {
	            System.out.println(e.getMessage());
	        }
			
	    }
		 catch (Exception ex) {
			 System.out.println(ex.getMessage());
		}
     }
	
	 //filtering function
	 private boolean timeDecision(String data){
		 if(data.compareTo(time)>=0)
		    return true;
		 else
		    return false;
				 
	 }
}




使用说明

      (1)将必应词典导出到本地文件“bing.xml”,放入工程根目录下。

      (2)定义 bing2youdao对象,可以使用三种构造函数: 

     new bing2youdao("bing.xml");
                new bing2youdao("bing.xml",“yyyy-mm-dd”);//表示仅仅转换“yyyy-mm-dd”(含)之后天数记录的单词
                new bing2youdao("bing.xml",“yyyy-mm-dd”,“newClass”);//表示将转换的单词归为“newClass”类

      (3)调用run()生成“youdao.xml”并将其导入到有道词典单词本即可。本例模式的复习进度为1,若希望其不加入复习计划,修改为“-1”即可

 

如:

	 public static void main(String args[]) {
		 bing2youdao jisuan= new bing2youdao("bing.xml","2015-10-21","CC");
		 jisuan.run();
	 }
     上例仅仅转换2015年10月21日后保存到必应词典中的单词,并将其统一归为“CC”类。



原文地址:https://www.cnblogs.com/engineerLF/p/5392961.html