package com.chimbori.crux.articles;

import com.chimbori.crux.common.Log;
import com.chimbori.crux.common.StringUtils;
import com.huawei.hms.feature.dynamic.e.a;
import com.huawei.hms.feature.dynamic.e.b;
import com.shawnyang.jpreader_lib.data.room.model.Book;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import java.util.regex.Pattern;
import org.jsoup.nodes.Attribute;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;

/* loaded from: classes.dex */
class PostprocessHelpers {
    private static final int MIN_LENGTH_FOR_PARAGRAPHS = 50;
    private Set<Node> keepers;
    private static final Pattern UNLIKELY_CSS_STYLES = Pattern.compile("display\\:none|visibility\\:hidden");
    private static final Set<String> REMOVE_TAGS_BUT_RETAIN_CONTENT = new HashSet(Arrays.asList("font", "table", "tbody", "tr", "td", "div", "ol", "ul", "li", "span"));
    private static final Set<String> RETAIN_TAGS = new HashSet(Arrays.asList("p", b.a, "i", "u", "strong", "em", a.a, "pre", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote"));
    private static final Set<String> TAGS_EXEMPT_FROM_MIN_LENGTH_CHECK = new HashSet(Arrays.asList(b.a, "i", "u", "strong", "em", a.a, "pre", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote"));
    private static final Set<String> ATTRIBUTES_TO_RETAIN_IN_HTML = new HashSet(Arrays.asList(Book.HREF));
    private static final Set<String> RETAIN_TAGS_TOP_LEVEL = new HashSet(Arrays.asList("p", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "li"));

    private PostprocessHelpers(Set<Node> set) {
        this.keepers = set;
    }

    private static Collection<Node> getAncestorsSelfAndDescendants(Element element, Element element2) {
        ArrayList arrayList = new ArrayList();
        for (Node node = element2; node != element && node != null; node = node.parentNode()) {
            arrayList.add(node);
        }
        ArrayDeque arrayDeque = new ArrayDeque(element2.childNodes());
        while (!arrayDeque.isEmpty()) {
            Node node2 = (Node) arrayDeque.poll();
            arrayList.add(node2);
            Iterator<Node> it = node2.childNodes().iterator();
            while (it.hasNext()) {
                arrayDeque.offer(it.next());
            }
        }
        return arrayList;
    }

    private boolean isUnlikely(Element element) {
        String attr = element.attr("style");
        String attr2 = element.attr("class");
        if (attr2 == null || !attr2.toLowerCase().contains("caption")) {
            Pattern pattern = UNLIKELY_CSS_STYLES;
            if (!pattern.matcher(attr).find() && (attr2 == null || !pattern.matcher(attr2).find())) {
                return false;
            }
        }
        return true;
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public static Document postprocess(Element element) {
        Log.i("postprocess", new Object[0]);
        Document document = new Document("");
        if (element == null) {
            return document;
        }
        Set newSetFromMap = Collections.newSetFromMap(new IdentityHashMap());
        Iterator<Element> it = element.select("[crux-keep]").iterator();
        while (it.hasNext()) {
            newSetFromMap.addAll(getAncestorsSelfAndDescendants(element, it.next()));
        }
        PostprocessHelpers postprocessHelpers = new PostprocessHelpers(newSetFromMap);
        postprocessHelpers.removeNodesWithNegativeScores(element);
        postprocessHelpers.replaceLineBreaksWithSpaces(element);
        postprocessHelpers.removeUnlikelyChildNodes(element);
        postprocessHelpers.removeTagsButRetainContent(element);
        postprocessHelpers.removeTagsNotLikelyToBeParagraphs(element);
        postprocessHelpers.removeTopLevelTagsNotLikelyToBeParagraphs(element);
        postprocessHelpers.removeShortParagraphs(element);
        postprocessHelpers.removeDisallowedAttributes(element);
        Iterator<Node> it2 = element.childNodes().iterator();
        while (it2.hasNext()) {
            document.appendChild(it2.next().mo2146clone());
        }
        return document;
    }

    private void removeDisallowedAttributes(Element element) {
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            removeDisallowedAttributes(it.next());
        }
        LinkedList linkedList = new LinkedList();
        Iterator<Attribute> it2 = element.attributes().iterator();
        while (it2.hasNext()) {
            Attribute next = it2.next();
            if (!ATTRIBUTES_TO_RETAIN_IN_HTML.contains(next.getKey())) {
                linkedList.add(next.getKey());
            }
        }
        Iterator it3 = linkedList.iterator();
        while (it3.hasNext()) {
            element.removeAttr((String) it3.next());
        }
    }

    private void removeNodesWithNegativeScores(Element element) {
        Iterator<Element> it = element.select(ExtractionHelpers.GRAVITY_SCORE_SELECTOR).iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (Integer.parseInt(next.attr(ExtractionHelpers.GRAVITY_SCORE_ATTRIBUTE)) < 0 || next.text().length() < 50) {
                if (!shouldKeep(next)) {
                    Log.printAndRemove(next, "removeNodesWithNegativeScores");
                }
            }
        }
    }

    private void removeShortParagraphs(Element element) {
        String str;
        String trim;
        boolean contains;
        for (int childNodeSize = element.childNodeSize() - 1; childNodeSize >= 0; childNodeSize--) {
            Node childNode = element.childNode(childNodeSize);
            if (childNode instanceof TextNode) {
                str = ((TextNode) childNode).text().trim();
            } else if (childNode instanceof Element) {
                Element element2 = (Element) childNode;
                trim = element2.text().trim();
                contains = TAGS_EXEMPT_FROM_MIN_LENGTH_CHECK.contains(element2.tagName());
                Log.i("removeShortParagraphs: [%s] isExemptFromMinTextLengthCheck : %b", childNode, Boolean.valueOf(contains));
                if ((trim != null || trim.isEmpty() || ((!contains && trim.length() < 50) || trim.length() > StringUtils.countLetters(trim) * 2)) && !shouldKeep(childNode)) {
                    Log.printAndRemove(childNode, "removeShortParagraphs:");
                }
            } else {
                str = null;
            }
            trim = str;
            contains = false;
            Log.i("removeShortParagraphs: [%s] isExemptFromMinTextLengthCheck : %b", childNode, Boolean.valueOf(contains));
            if (trim != null) {
            }
            Log.printAndRemove(childNode, "removeShortParagraphs:");
        }
    }

    private void removeTagsButRetainContent(Element element) {
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            removeTagsButRetainContent(next);
            if (REMOVE_TAGS_BUT_RETAIN_CONTENT.contains(next.tagName())) {
                Log.i("removeTagsButRetainContent: [%s] %s", next.tagName(), next.outerHtml());
                next.tagName("p");
            }
        }
    }

    private void removeTagsNotLikelyToBeParagraphs(Element element) {
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (RETAIN_TAGS.contains(next.tagName())) {
                if (next.children().size() > 0) {
                    removeTagsNotLikelyToBeParagraphs(next);
                }
            } else if (!shouldKeep(next)) {
                Log.printAndRemove(next, "removeTagsNotLikelyToBeParagraphs");
            }
        }
    }

    private void removeTopLevelTagsNotLikelyToBeParagraphs(Element element) {
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (!RETAIN_TAGS_TOP_LEVEL.contains(next.tagName()) && !shouldKeep(next)) {
                Log.printAndRemove(next, "removeTopLevelTagsNotLikelyToBeParagraphs");
            }
        }
    }

    private void removeUnlikelyChildNodes(Element element) {
        Iterator<Element> it = element.children().iterator();
        while (it.hasNext()) {
            Element next = it.next();
            if (isUnlikely(next)) {
                if (!shouldKeep(next)) {
                    Log.printAndRemove(next, "removeUnlikelyChildNodes");
                }
            } else if (next.children().size() > 0) {
                removeUnlikelyChildNodes(next);
            }
        }
    }

    private void replaceLineBreaksWithSpaces(Element element) {
        Iterator<Element> it = element.select("br + br").iterator();
        while (it.hasNext()) {
            it.next().remove();
        }
        Iterator<Element> it2 = element.select("br").iterator();
        while (it2.hasNext()) {
            Element next = it2.next();
            if (next.previousSibling() != null) {
                next.previousSibling().after(" • ");
            } else {
                next.parent().append(" • ");
            }
            next.unwrap();
        }
    }

    private boolean shouldKeep(Node node) {
        return this.keepers.contains(node);
    }
}
