package com.zhsoft88.commons;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.net.Socket;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.math.NumberUtils;

/**
 * seaflower crawler
 * @author zhsoft88
 * @since 2008-4-13
 * @update 2009-6-20
 */
public class Seaflower {

	public static final int PORT = 4050;
	
	/**
	 * crawl result
	 * @author zhsoft88
	 * @since 2008-4-13
	 */
	public static class SeaflowerResult {
		private int status;
		private String title;
		private String location;
		private String contents;
		private long time;
		private List<String> requestHeaders;
		private List<String> responseHeaders;
		
		public SeaflowerResult() {
		}
		
		public int getStatus() {
			return status;
		}
		public String getTitle() {
			return title;
		}
		public String getLocation() {
			return location;
		}
		public String getContents() {
			return contents;
		}
		public long getTime() {
			return time;
		}
		public List<String> getRequestHeaders() {
			return requestHeaders;
		}
		public List<String> getResponseHeaders() {
			return responseHeaders;
		}
		protected void setStatus(int status) {
			this.status = status;
		}
		protected void setTitle(String title) {
			this.title = title;
		}
		protected void setLocation(String location) {
			this.location = location;
		}
		protected void setContents(String contents) {
			this.contents = contents;
		}
		protected void setTime(long time) {
			this.time = time;
		}
		protected void setRequestHeaders(List<String> requestHeaders) {
			this.requestHeaders = requestHeaders;
		}
		protected void setResponseHeaders(List<String> responseHeaders) {
			this.responseHeaders = responseHeaders;
		}
		@Override
		public String toString() {
			return "status="+status+",location="+location+",title="+title+",time="+time+",request-headers="+requestHeaders+",response-headers="+responseHeaders+",contents=["+contents+"]";
		}
	}
	
	/**
	 * crawl configuration
	 * @author zhsoft88
	 * @since 2008-4-13
	 * @update 2008-12-16
	 */
	public static class SeaflowerConf {
		private String url;
		private String exec;
		private int waitTime;
		private boolean cont;
		private boolean nodata;
		private List<String> httpHeaders;
		private boolean outputHttpHeaders;
		private boolean disableJavascript;
		
		public SeaflowerConf() {
		}

		public void addHttpHeader(String header) {
			if (httpHeaders==null) {
				httpHeaders = new ArrayList<String>();
			}
			httpHeaders.add(header);
		}
		
		public String getUrl() {
			return url;
		}

		public void setUrl(String url) {
			this.url = url;
		}

		public String getExec() {
			return exec;
		}

		public void setExec(String exec) {
			this.exec = exec;
		}

		public int getWaitTime() {
			return waitTime;
		}

		public void setWaitTime(int waitTime) {
			this.waitTime = waitTime;
		}

		public void setContinue(boolean cont) {
			this.cont = cont;
		}

		public boolean isNodata() {
			return nodata;
		}

		public void setNodata(boolean nodata) {
			this.nodata = nodata;
		}

		public boolean isContinue() {
			return cont;
		}

		public List<String> getHttpHeaders() {
			return httpHeaders;
		}

		public void setHttpHeaders(List<String> httpHeaders) {
			this.httpHeaders = httpHeaders;
		}

		public boolean isOutputHttpHeaders() {
			return outputHttpHeaders;
		}

		public void setOutputHttpHeaders(boolean outputHttpHeaders) {
			this.outputHttpHeaders = outputHttpHeaders;
		}

		public boolean isDisableJavascript() {
			return disableJavascript;
		}

		public void setDisableJavascript(boolean disableJavascript) {
			this.disableJavascript = disableJavascript;
		}
		
	}

	private Socket socket;
	
	public Seaflower() throws UnknownHostException, IOException {
		this("localhost");
	}
	
	public Seaflower(String host) throws UnknownHostException, IOException {
		this(host,PORT);
	}
	
	public Seaflower(String host,int port) throws UnknownHostException, IOException {
		socket = new Socket(host,port);
	}
	
	private String readUTFLine(InputStream in) throws IOException {
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		int c;
		while ((c=in.read())!=-1) {
			if (c=='\r') continue;
			if (c=='\n') break;
			baos.write(c);
		}
		return baos.toString("utf-8");
	}
	/**
	 * crawl
	 * @param conf
	 * @return
	 * @throws Exception
	 */
	public SeaflowerResult crawl(SeaflowerConf conf) throws Exception {
		if (socket==null) {
			throw new Exception("socket closed");
		}
		long t1 = System.currentTimeMillis();
		BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream()));
		if (conf.getUrl()!=null) {
			bw.write("GET "+conf.getUrl()+"\r\n");
		}
		if (conf.getExec()!=null) {
			bw.write("EXEC "+conf.getExec()+"\r\n");
		}
		if (conf.getWaitTime()!=-1) {
			bw.write("WAIT-TIME "+conf.getWaitTime()+"\r\n");
		}
		if (conf.getHttpHeaders()!=null) {
			for (String s : conf.getHttpHeaders()) {
				bw.write("HTTP-HEADER "+s+"\r\n");
			}
		}
		if (conf.isOutputHttpHeaders()) {
			bw.write("OUTPUT http-headers\r\n");
		}
		if (conf.isDisableJavascript()) {
			bw.write("DISABLE javascript\r\n");
		}
		if (conf.isContinue()) {
			bw.write("CONTINUE\r\n");
		}
		if (conf.isNodata()) {
			bw.write("NODATA\r\n");
		}
		bw.write("\r\n");
		bw.flush();
		InputStream in = socket.getInputStream();
		String line = readUTFLine(in);
		int status = -1;
		StringTokenizer st = new StringTokenizer(line," ");
		st.nextToken();
		status = NumberUtils.toInt(st.nextToken());
		String tagTitle = "Current-Title: ";
		String tagLocation = "Current-Location: ";
		String tagLength = "Content-Length: ";
		String title = null;
		String location = null;
		int length = 0;
		while ((line=readUTFLine(in))!=null) {
			if (line.length()==0) break;
			if (line.startsWith(tagTitle)) {
				title = line.substring(tagTitle.length());
			} else if (line.startsWith(tagLocation)) {
				location = line.substring(tagLocation.length());
			} else if (line.startsWith(tagLength)) {
				length = NumberUtils.toInt(line.substring(tagLength.length()));
			}
		}
		ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
		byte[] ba = new byte[4096];
		while (length>0) {
			int len = in.read(ba);
			baos.write(ba, 0, len);
			length -= len;
		}
		String contents = baos.toString("utf-8");
		if (!conf.isContinue()) {
			socket.close();
			socket = null;
		}
		long t2 = System.currentTimeMillis();
		SeaflowerResult result = new SeaflowerResult();
		result.setStatus(status);
		result.setTitle(title);
		result.setLocation(location);
		result.setTime(t2-t1);
		result.setContents(contents);
		List<String> requestHeaders = new ArrayList<String>();
		List<String> responseHeaders = new ArrayList<String>();
		if (conf.isOutputHttpHeaders()) {
			BufferedReader hbr = new BufferedReader(new StringReader(result.getContents()));
			boolean found = false;
			while ((line=hbr.readLine())!=null) {
				if (line.equals("<!-- @REQUEST-HEADERS [")) {
					found = true;
					break;
				}
			}
			if (found)
			{
				while ((line=hbr.readLine())!=null) {
					if (line.equals("] -->")) break;
					requestHeaders.add(line);
				}
			}
			found = false;
			while ((line=hbr.readLine())!=null) {
				if (line.equals("<!-- @RESPONSE-HEADERS [")) {
					found = true;
					break;
				}
			}
			if (found)
			{
				while ((line=hbr.readLine())!=null) {
					if (line.equals("] -->")) break;
					responseHeaders.add(line);
				}
			}
			result.setContents(IOUtils.toString(hbr));
		}
		result.setRequestHeaders(requestHeaders);
		result.setResponseHeaders(responseHeaders);
		return result;
	}
	
}
