package org.apache.tika.batch.fs.builders;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.regex.Pattern;
import opennlp.tools.ml.model.AbstractDataIndexer;
import org.apache.tika.batch.FileResource;
import org.apache.tika.batch.FileResourceCrawler;
import org.apache.tika.batch.builders.BatchProcessBuilder;
import org.apache.tika.batch.builders.ICrawlerBuilder;
import org.apache.tika.batch.fs.FSDirectoryCrawler;
import org.apache.tika.batch.fs.FSDocumentSelector;
import org.apache.tika.batch.fs.FSListCrawler;
import org.apache.tika.extractor.DocumentSelector;
import org.apache.tika.util.PropsUtil;
import org.apache.tika.util.XMLDOMUtil;
import org.w3c.dom.Node;

/* loaded from: input_file:org/apache/tika/batch/fs/builders/FSCrawlerBuilder.class */
public class FSCrawlerBuilder implements ICrawlerBuilder {
    private static final String MAX_CONSEC_WAIT_MILLIS = "maxConsecWaitMillis";
    private static final String MAX_FILES_TO_ADD_ATTR = "maxFilesToAdd";
    private static final String MAX_FILES_TO_CONSIDER_ATTR = "maxFilesToConsider";
    private static final String CRAWL_ORDER = "crawlOrder";
    private static final String INPUT_DIR_ATTR = "inputDir";
    private static final String INPUT_START_DIR_ATTR = "startDir";
    private static final String MAX_FILE_SIZE_BYTES_ATTR = "maxFileSizeBytes";
    private static final String MIN_FILE_SIZE_BYTES_ATTR = "minFileSizeBytes";
    private static final String INCLUDE_FILE_PAT_ATTR = "includeFilePat";
    private static final String EXCLUDE_FILE_PAT_ATTR = "excludeFilePat";

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.tika.batch.builders.ICrawlerBuilder, org.apache.tika.batch.builders.ObjectFromDOMAndQueueBuilder
    public FileResourceCrawler build(Node node, Map<String, String> map, ArrayBlockingQueue<FileResource> arrayBlockingQueue) {
        FileResourceCrawler fSDirectoryCrawler;
        Map<String, String> mapifyAttrs = XMLDOMUtil.mapifyAttrs(node, map);
        int numConsumers = BatchProcessBuilder.getNumConsumers(map);
        Path path = PropsUtil.getPath(mapifyAttrs.get(INPUT_DIR_ATTR), Paths.get("input", new String[0]));
        if (mapifyAttrs.containsKey("fileList")) {
            if (mapifyAttrs.get(CRAWL_ORDER) != null) {
                System.err.println("randomCrawl attribute is ignored by FSListCrawler");
            }
            Path path2 = PropsUtil.getPath(mapifyAttrs.get("fileList"), null);
            String string = PropsUtil.getString(mapifyAttrs.get("fileListEncoding"), "UTF-8");
            try {
                fSDirectoryCrawler = new FSListCrawler(arrayBlockingQueue, numConsumers, path, path2, Charset.forName(string));
            } catch (FileNotFoundException e) {
                throw new RuntimeException("fileList file not found for FSListCrawler: " + path2.toAbsolutePath());
            } catch (UnsupportedEncodingException e2) {
                throw new RuntimeException("fileList encoding not supported: " + string);
            } catch (IOException e3) {
                throw new RuntimeException("IOException while trying to open fileList: " + e3.getMessage(), e3);
            }
        } else {
            FSDirectoryCrawler.CRAWL_ORDER crawlOrder = getCrawlOrder(mapifyAttrs.get(CRAWL_ORDER));
            Path path3 = PropsUtil.getPath(mapifyAttrs.get(INPUT_START_DIR_ATTR), null);
            fSDirectoryCrawler = path3 == null ? new FSDirectoryCrawler(arrayBlockingQueue, numConsumers, path, crawlOrder) : new FSDirectoryCrawler(arrayBlockingQueue, numConsumers, path, path3, crawlOrder);
        }
        fSDirectoryCrawler.setMaxFilesToConsider(PropsUtil.getInt(mapifyAttrs.get(MAX_FILES_TO_CONSIDER_ATTR), -1).intValue());
        fSDirectoryCrawler.setMaxFilesToAdd(PropsUtil.getInt(mapifyAttrs.get(MAX_FILES_TO_ADD_ATTR), -1).intValue());
        DocumentSelector buildSelector = buildSelector(mapifyAttrs);
        if (buildSelector != null) {
            fSDirectoryCrawler.setDocumentSelector(buildSelector);
        }
        fSDirectoryCrawler.setMaxConsecWaitInMillis(PropsUtil.getLong(mapifyAttrs.get(MAX_CONSEC_WAIT_MILLIS), 300000L).longValue());
        return fSDirectoryCrawler;
    }

    private FSDirectoryCrawler.CRAWL_ORDER getCrawlOrder(String str) {
        return (str == null || str.trim().length() == 0 || str.equals("os")) ? FSDirectoryCrawler.CRAWL_ORDER.OS_ORDER : str.toLowerCase(Locale.ROOT).contains("rand") ? FSDirectoryCrawler.CRAWL_ORDER.RANDOM : str.toLowerCase(Locale.ROOT).contains(AbstractDataIndexer.SORT_PARAM) ? FSDirectoryCrawler.CRAWL_ORDER.SORTED : FSDirectoryCrawler.CRAWL_ORDER.OS_ORDER;
    }

    private DocumentSelector buildSelector(Map<String, String> map) {
        String str = map.get(INCLUDE_FILE_PAT_ATTR);
        String str2 = map.get(EXCLUDE_FILE_PAT_ATTR);
        long longValue = PropsUtil.getLong(map.get(MAX_FILE_SIZE_BYTES_ATTR), -1L).longValue();
        return new FSDocumentSelector((str == null || str.length() <= 0) ? null : Pattern.compile(str), (str2 == null || str2.length() <= 0) ? null : Pattern.compile(str2), PropsUtil.getLong(map.get(MIN_FILE_SIZE_BYTES_ATTR), -1L).longValue(), longValue);
    }

    @Override // org.apache.tika.batch.builders.ICrawlerBuilder, org.apache.tika.batch.builders.ObjectFromDOMAndQueueBuilder
    public /* bridge */ /* synthetic */ FileResourceCrawler build(Node node, Map map, ArrayBlockingQueue arrayBlockingQueue) {
        return build(node, (Map<String, String>) map, (ArrayBlockingQueue<FileResource>) arrayBlockingQueue);
    }
}
