package com.zhsoft88.commons.tests;

import java.net.URL;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentHelper;

import com.zhsoft88.commons.Seastar;
import com.zhsoft88.commons.Seastar.SeastarResult;

/**
 * Test of Seastar
 * @author zhsoft88
 * @since 2008-08-03
 */
public class TestSeastar2 {

	/**
	 * @param args
	 */
	public static void main(String[] args) throws Exception {
		String url = "http://www.sohu.com";
		URL base = new URL(url);
		long t1 = System.currentTimeMillis();
		HttpClient client = new HttpClient();
		GetMethod get = new GetMethod(url);
		client.executeMethod(get);
		String origContent = IOUtils.toString(get.getResponseBodyAsStream(),"gbk");
		long t2 = System.currentTimeMillis();
		System.out.println("httpclient: "+(t2-t1)+" ms");
		Seastar ss = new Seastar();
		SeastarResult result = ss.structString(origContent);
		System.out.println("seastar: "+result.getElapsedTime()+" ms");
		Document doc = DocumentHelper.parseText(result.getContents());
		List<Attribute> list = doc.selectNodes("//a/@href|//frame/@src|//iframe/@src");
		Set<String> set = new HashSet<String>();
		for (Attribute a : list) {
			String v = a.getValue();
			if (v.startsWith("javascript:")||v.startsWith("mailto:")||v.startsWith("#")) continue;
			set.add(new URL(base,v).toExternalForm());
		}
		System.out.println("size="+set.size());
		for (String s : set) {
			System.out.println(s);
		}
	}

}
