package org.apache.tika.eval.core.util;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import org.apache.tika.exception.TikaException;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.ToTextContentHandler;
import org.apache.tika.utils.XMLReaderUtils;
import org.ccil.cowan.tagsoup.jaxp.SAXParserImpl;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/eval/core/util/ContentTagParser.class */
public class ContentTagParser {
    private static final ParseContext EMPTY_PARSE_CONTEXT = new ParseContext();

    /* loaded from: input_file:org/apache/tika/eval/core/util/ContentTagParser$XHTMLContentTagHandler.class */
    private static class XHTMLContentTagHandler extends ToTextContentHandler {
        private final Map<String, Integer> tags;
        private final Set<String> uppercaseTagsOfInterest;

        public XHTMLContentTagHandler(Set<String> set, Map<String, Integer> map) {
            this.uppercaseTagsOfInterest = set;
            this.tags = map;
        }

        @Override // org.apache.tika.sax.ToTextContentHandler, org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
        public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
            super.startElement(str, str2, str3, attributes);
            String upperCase = str3 == null ? "" : str3.toUpperCase(Locale.ENGLISH);
            if (this.uppercaseTagsOfInterest.contains(upperCase)) {
                Integer num = this.tags.get(upperCase);
                this.tags.put(upperCase, num == null ? 1 : Integer.valueOf(num.intValue() + 1));
            }
        }
    }

    public static ContentTags parseXML(String str, Set<String> set) throws TikaException, IOException, SAXException {
        HashMap hashMap = new HashMap();
        XHTMLContentTagHandler xHTMLContentTagHandler = new XHTMLContentTagHandler(set, hashMap);
        XMLReaderUtils.parseSAX(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)), xHTMLContentTagHandler, EMPTY_PARSE_CONTEXT);
        return new ContentTags(xHTMLContentTagHandler.toString(), hashMap);
    }

    public static ContentTags parseHTML(String str, Set<String> set) throws SAXException, IOException {
        HashMap hashMap = new HashMap();
        XHTMLContentTagHandler xHTMLContentTagHandler = new XHTMLContentTagHandler(set, hashMap);
        SAXParserImpl.newInstance(null).parse(new InputSource(new StringReader(str)), xHTMLContentTagHandler);
        return new ContentTags(xHTMLContentTagHandler.toString(), hashMap);
    }
}
