onWebView检查网页中文

问题：要检查网页中的一段文本：

开始我是这样写的：

private final static String SPECIFIED_TEXT = "这个是一段中文";

onWebView().check(webContent(containingTextInNode(SPECIFIED_TEXT )));


然后直接报错了

从adb logcat看到的结果是网页中文显示为乱码，尝试输出了一下每个中文的长度都是3；但是可以看到网页结构和数据

可以看到数据文本数据是在<p></p> <h2></h2> 里面
不死心啊：


把检查代码全部从库里面拷贝出来


改成

onWebView().check(userWebContent(containingTextInNode(SPECIFIED_TEXT , "p")));

/**
 * 为了把网页输出出来
 * @param xml
 */
public static void logall(String xml) {
    if (xml.length() > 4000) {
        for (int i = 0; i < xml.length(); i += 4000) {
            if (i + 4000 < xml.length())
                Log.i(TAG, xml.substring(i, i + 4000));
            else
                Log.i(TAG, xml.substring(i, xml.length()));
        }
    } else
        Log.i(TAG, xml);
}


/**
     * A WebAssertion which asserts that the document is matched by th provided matcher.
     */
    public static WebAssertion<Document> userWebContent(final Matcher<Document> domMatcher) {
        checkNotNull(domMatcher);
        return webMatches(transform(script("return document.documentElement.outerHTML;"),
                new TransformingAtom.Transformer<Evaluation, Document>() {
                    @Override
                    public Document apply(Evaluation eval) {
                        if (eval.getValue() instanceof String) {
                            try {
//                                Logall( "eval.getValue() " + (String)eval.getValue()); //这个地方能完整输出网页数据-不乱码的
//                                return TagSoupDocumentParser.newInstance().parse((String) eval.getValue()); //这个方法不能显示中文
                                org.jsoup.helper.W3CDom w3cDom = new W3CDom();
                                org.jsoup.nodes.Document doc = Jsoup.parseBodyFragment((String) eval.getValue()); //org.jsoup.nodes.Document无法转换为org.w3c.dom.Document
                                return w3cDom.fromJsoup(doc);
                            } catch (Exception se) {
                                throw new RuntimeException("Parse failed: " + eval.getValue(), se);
                            }
                        }
                        throw new RuntimeException("Value should have been a string: " + eval);
                    }
                }), domMatcher,
                new WebViewAssertions.ResultDescriber<Document>() {
                    @Override
                    public String apply(Document document) {
                        try {
                            DOMSource docSource = new DOMSource(document);
                            Transformer tf = TransformerFactory.newInstance().newTransformer();
                            StringWriter writer = new StringWriter();
                            StreamResult streamer = new StreamResult(writer);
                            tf.transform(docSource, streamer);
                            return writer.toString();
                        } catch (TransformerException e) {
                            return "Could not transform!!!" + e;
                        }
                    }
                });
    }
/**
 * Returns a matcher that matches Documents that have a body containing the given test.
 */
public static Matcher<Document> containingTextInNode(String text, final String nodeNme) {
    checkNotNull(text);
    return withNodeName(withTextContent(containsString(text)), nodeNme);
}


    /**
     * Returns a matcher that matches {@link Document}s with body that matches the given matcher.
     */
    public static Matcher<Document> withNodeName(final Matcher<Element> bodyMatcher, final String nodeNme) {
        checkNotNull(bodyMatcher);
        return new TypeSafeMatcher<Document>() {
            @Override
            public void describeTo(Description description) {
                description.appendText("with NodeName: ");
                bodyMatcher.describeTo(description);
            }

            @Override
            public boolean matchesSafely(Document document) {
                NodeList nodeList = document.getElementsByTagName(nodeNme);
                if (nodeList.getLength() == 0) {
                    return false;
                }
//                showNode(nodeList, "");
                for (int i = 0; i < nodeList.getLength(); i++) {
                    if (bodyMatcher.matches(nodeList.item(i))) {
                        return true;
                    }
                }
                return false;
            }
        };
    }

/**
 * 将节点集放入已排序的集合中时，W3C 将其称为 NodeList；可以按从零开始的索引检索数据。
 *
 * @param nodeList
 * @param path
 */
public static void showNode(NodeList nodeList, String path) {
    for (int i = 0; i < nodeList.getLength(); i++) {
        Node mobilePhone = nodeList.item(i);
        int destination = mobilePhone.getTextContent().length();
        NodeList mobileNodeList = mobilePhone.getChildNodes();
        if (mobileNodeList.getLength() > 0) {
            showNode(mobileNodeList, path + "-" + mobilePhone.getNodeName());
        } else {
            Log.i(TAG, path + "-" + mobilePhone.getNodeName() + ":" + destination + " " + mobilePhone.getTextContent()); //无子节点了就显示
        }
    }
}

//上面我们用了jsoup库，gradle里面增加库依赖 
//还要注意Document转换

dependencies {
    compile 'org.jsoup:jsoup:1.9.2'
　　 androidTestCompile 'org.jsoup:jsoup:1.9.2'  //测试用这个
}

至此可以顺利检查到网页中的中文啦，代码比较乱，将就着先用吧