package org.webharvest.runtime.processors;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.auth.NTLM;
import org.webharvest.definition.HttpDef;
import org.webharvest.definition.ScraperConfiguration;
import org.webharvest.exception.HttpException;
import org.webharvest.runtime.Scraper;
import org.webharvest.runtime.ScraperContext;
import org.webharvest.runtime.scripting.ScriptEngine;
import org.webharvest.runtime.templaters.BaseTemplater;
import org.webharvest.runtime.variables.NodeVariable;
import org.webharvest.runtime.variables.Variable;
import org.webharvest.runtime.web.HttpClientManager;
import org.webharvest.runtime.web.HttpParamInfo;
import org.webharvest.runtime.web.HttpResponseWrapper;
import org.webharvest.utils.CommonUtil;
import org.webharvest.utils.KeyValuePair;

/* loaded from: input_file:WEB-INF/lib/web-harvest-2.0-cl_20100531.jar:org/webharvest/runtime/processors/HttpProcessor.class */
public class HttpProcessor extends BaseProcessor {
    private static final String HTML_META_CHARSET_REGEX = "(<meta\\s*http-equiv\\s*=\\s*(\"|')content-type(\"|')\\s*content\\s*=\\s*(\"|')text/html;\\s*charset\\s*=\\s*(.*?)(\"|')/?>)";
    private HttpDef httpDef;
    private Map<String, HttpParamInfo> httpParams;
    private Map httpHeaderMap;

    public HttpProcessor(HttpDef httpDef) {
        super(httpDef);
        this.httpParams = new LinkedHashMap();
        this.httpHeaderMap = new HashMap();
        this.httpDef = httpDef;
    }

    @Override // org.webharvest.runtime.processors.BaseProcessor
    public Variable execute(Scraper scraper, ScraperContext scraperContext) {
        NodeVariable nodeVariable;
        scraper.setRunningHttpProcessor(this);
        ScriptEngine scriptEngine = scraper.getScriptEngine();
        String execute = BaseTemplater.execute(this.httpDef.getUrl(), scriptEngine);
        String execute2 = BaseTemplater.execute(this.httpDef.getMethod(), scriptEngine);
        boolean booleanValue = CommonUtil.getBooleanValue(BaseTemplater.execute(this.httpDef.getMultipart(), scriptEngine), false);
        String execute3 = BaseTemplater.execute(this.httpDef.getCharset(), scriptEngine);
        String execute4 = BaseTemplater.execute(this.httpDef.getUsername(), scriptEngine);
        String execute5 = BaseTemplater.execute(this.httpDef.getPassword(), scriptEngine);
        String execute6 = BaseTemplater.execute(this.httpDef.getCookiePolicy(), scriptEngine);
        String str = execute3;
        if (str == null) {
            str = scraper.getConfiguration().getCharset();
        }
        new BodyProcessor(this.httpDef).execute(scraper, scraperContext);
        HttpClientManager httpClientManager = scraper.getHttpClientManager();
        httpClientManager.setCookiePolicy(execute6);
        HttpResponseWrapper execute7 = httpClientManager.execute(execute2, booleanValue, execute, str, execute4, execute5, this.httpParams, this.httpHeaderMap);
        scraper.removeRunningHttpProcessor();
        String mimeType = execute7.getMimeType();
        long contentLength = execute7.getContentLength();
        if (scraper.getLogger().isInfoEnabled()) {
            scraper.getLogger().info("Downloaded: " + execute + ", mime type = " + mimeType + ", length = " + contentLength + "B.");
        }
        String charset = execute7.getCharset();
        byte[] body = execute7.getBody();
        if (mimeType == null || mimeType.indexOf("text") >= 0 || mimeType.indexOf("xml") >= 0 || mimeType.indexOf(ScraperConfiguration.JAVASCRIPT_SCRIPT_ENGINE) >= 0) {
            if (execute3 == null) {
                if (charset != null) {
                    try {
                        if (Charset.isSupported(charset)) {
                            str = charset;
                        }
                    } catch (UnsupportedEncodingException e) {
                        throw new HttpException("Charset " + str + " is not supported!", e);
                    }
                }
                if ("text/html".equalsIgnoreCase(execute7.getMimeType())) {
                    Matcher matcher = Pattern.compile(HTML_META_CHARSET_REGEX, 2).matcher(new String(body, 0, Math.min(body.length, 1024), NTLM.DEFAULT_CHARSET));
                    if (matcher.find()) {
                        String group = matcher.group(5);
                        try {
                            if (Charset.isSupported(group)) {
                                str = group;
                            }
                        } catch (IllegalCharsetNameException e2) {
                        }
                    }
                }
            }
            nodeVariable = new NodeVariable(new String(body, str));
        } else {
            nodeVariable = new NodeVariable(body);
        }
        setProperty("URL", execute);
        setProperty("Method", execute2);
        setProperty("Multipart", String.valueOf(booleanValue));
        setProperty("Charset", str);
        setProperty("Content length", String.valueOf(contentLength));
        setProperty("Status code", Integer.valueOf(execute7.getStatusCode()));
        setProperty("Status text", execute7.getStatusText());
        KeyValuePair<String>[] headers = execute7.getHeaders();
        if (headers != null) {
            int i = 1;
            for (KeyValuePair<String> keyValuePair : headers) {
                setProperty("HTTP header [" + i + "]: " + keyValuePair.getKey(), keyValuePair.getValue());
                i++;
            }
        }
        return nodeVariable;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addHttpParam(String str, boolean z, String str2, String str3, Variable variable) {
        this.httpParams.put(str, new HttpParamInfo(str, z, str2, str3, variable));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void addHttpHeader(String str, String str2) {
        this.httpHeaderMap.put(str, str2);
    }
}
