package org.mdolidon.hamster.core;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/* loaded from: input_file:org/mdolidon/hamster/core/HTMLProcessingWorker.class */
public class HTMLProcessingWorker implements Runnable {
    private static String[] ignore_ProtocolPrefixes = {"mailto:", "telnet:", "nntp:", "gopher:", "news:", "file:", "ftp:", "chrome:"};
    private static Logger logger = LogManager.getLogger();
    private IMediator mediator;
    private IConfiguration configuration;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/mdolidon/hamster/core/HTMLProcessingWorker$ProcessingException.class */
    public class ProcessingException extends Exception {
        private static final long serialVersionUID = 1;

        ProcessingException(String str) {
            super(str);
        }
    }

    public HTMLProcessingWorker(IMediator iMediator, IConfiguration iConfiguration) {
        this.mediator = iMediator;
        this.configuration = iConfiguration;
    }

    @Override // java.lang.Runnable
    public void run() {
        logger.trace("Starting a processing worker");
        while (true) {
            try {
                Content provideHTMLContentToProcess = this.mediator.provideHTMLContentToProcess();
                try {
                    List<Link> processCurrentContent = processCurrentContent(provideHTMLContentToProcess);
                    if (!processCurrentContent.isEmpty()) {
                        this.mediator.acceptAllNewLinks(processCurrentContent);
                    }
                    this.mediator.acceptProcessedHTMLContent(provideHTMLContentToProcess);
                } catch (ProcessingException e) {
                    this.mediator.acceptHTMLProcessingError(provideHTMLContentToProcess, e.getMessage());
                }
            } catch (InterruptedException e2) {
                return;
            }
        }
    }

    public List<Link> processCurrentContent(Content content) throws ProcessingException, InterruptedException {
        URL effectiveLocation = content.getEffectiveLocation();
        if (effectiveLocation == null) {
            logger.warn("No effective location was set for {} ; using source link URL instead. May be unreliable.", content.getSourceLink());
            effectiveLocation = content.getSourceLink().getTarget();
        }
        logger.trace("HTML Processing worker got document for {}", effectiveLocation);
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(content.getBytes());
        ArrayList arrayList = new ArrayList(20);
        try {
            Document parse = Jsoup.parse(byteArrayInputStream, (String) null, effectiveLocation.toString());
            URL newBaseUrl_IfBaseElement = getNewBaseUrl_IfBaseElement(effectiveLocation, parse);
            Iterator<Element> it = selectLinkElements(parse).iterator();
            while (it.hasNext()) {
                processLinkElement(it.next(), content.getSourceLink(), newBaseUrl_IfBaseElement, arrayList);
            }
            content.setBytes(parse.outerHtml().getBytes(parse.outputSettings().charset()));
            return arrayList;
        } catch (IOException e) {
            throw new ProcessingException("Error while trying to parse HTML content");
        }
    }

    private Elements selectLinkElements(Document document) {
        return document.select("[href],[src]");
    }

    private void processLinkElement(Element element, Link link, URL url, List<Link> list) throws InterruptedException {
        if (element.hasAttr("href")) {
            processLinkElement(element, "href", link, url, list);
        } else if (element.hasAttr("src")) {
            processLinkElement(element, "src", link, url, list);
        }
    }

    private void processLinkElement(Element element, String str, Link link, URL url, List<Link> list) throws InterruptedException {
        if (element.tagName() == "base") {
            return;
        }
        String attr = element.attr(str);
        if (is_ignoredProtocolPrefix(attr)) {
            return;
        }
        try {
            Link link2 = new Link(new URL(url, attr), link.getJumpsFromStartingURL() + 1, this.configuration);
            if (element.tagName() == "img") {
                link2.setFlag_image();
            }
            link2.bindSourceElement(element);
            boolean isPartOfTargetSet = link2.isPartOfTargetSet();
            boolean needsDownload = link2.needsDownload();
            link2.unbindSourceElement();
            element.attr(str, isPartOfTargetSet ? Utils.getRelativeHref(link.getStorageFile(), link2.getStorageFile()) + link2.getUrlHash() : link2.getTargetAsString());
            if (needsDownload) {
                list.add(link2);
            }
        } catch (MalformedURLException e) {
            logger.info("Ignoring malformed URL : {}, found in {}", attr, link.getTargetAsString());
        }
    }

    private URL getNewBaseUrl_IfBaseElement(URL url, Document document) throws ProcessingException {
        Elements select = document.select("base");
        if (select.isEmpty()) {
            return url;
        }
        if (select.size() > 1) {
            throw new ProcessingException("Several <base> elements in the document. This is invalid HTML, and I don't know which one to use.");
        }
        Element first = select.first();
        if (!first.hasAttr("href")) {
            return url;
        }
        try {
            return new URL(url, first.attr("href"));
        } catch (MalformedURLException e) {
            throw new ProcessingException("<base> element has a malformed URL : " + first.attr("href"));
        }
    }

    private boolean is_ignoredProtocolPrefix(String str) {
        if (str == null) {
            return true;
        }
        for (String str2 : ignore_ProtocolPrefixes) {
            if (str.startsWith(str2)) {
                return true;
            }
        }
        return false;
    }
}
