/*
 * Decompiled with CFR 0.152.
 */
package de.pangaea.metadataportal.harvester;

import de.pangaea.metadataportal.Package;
import de.pangaea.metadataportal.config.HarvesterConfig;
import de.pangaea.metadataportal.harvester.Harvester;
import de.pangaea.metadataportal.harvester.OAIMetadataDocument;
import de.pangaea.metadataportal.harvester.RetryAfterIOException;
import de.pangaea.metadataportal.processor.ElasticsearchConnection;
import de.pangaea.metadataportal.processor.MetadataDocument;
import de.pangaea.metadataportal.utils.BooleanParser;
import de.pangaea.metadataportal.utils.ExtendedDigester;
import de.pangaea.metadataportal.utils.HttpClientUtils;
import de.pangaea.metadataportal.utils.HugeStringHashBuilder;
import java.io.IOException;
import java.io.InputStream;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.Instant;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import java.util.Collections;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Supplier;
import org.apache.commons.digester.AbstractObjectCreationFactory;
import org.apache.commons.digester.ObjectCreationFactory;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

public abstract class OAIHarvesterBase
extends Harvester {
    public static final String OAI_NS = "http://www.openarchives.org/OAI/2.0/";
    public static final String OAI_STATICREPOSITORY_NS = "http://www.openarchives.org/OAI/2.0/static-repository";
    public static final int DEFAULT_RETRY_TIME = 60;
    public static final int DEFAULT_RETRY_COUNT = 5;
    public static final int DEFAULT_TIMEOUT = 180;
    public static final String USER_AGENT = "Java/" + Runtime.version() + " (" + Package.getProductName() + '/' + Package.getVersion() + "; OAI downloader)";
    protected final String metadataPrefix;
    protected final String identifierPrefix;
    protected final Set<String> sets;
    protected final int retryCount;
    protected final int retryTime;
    protected final Duration timeout;
    protected final String authorizationHeader;
    protected final boolean ignoreDatestamps;
    protected final boolean deleteMissingDocuments;
    private HugeStringHashBuilder validIdentifiersBuilder = null;
    protected final HttpClient httpClient;
    protected boolean filterIncomingSets = true;

    public OAIHarvesterBase(HarvesterConfig iconfig) {
        super(iconfig);
        String[] sets;
        String s = iconfig.properties.getProperty("setSpec");
        this.sets = s != null ? ((sets = s.split("[\\,\\;\\s]+")).length == 0 ? null : Set.of(sets)) : null;
        this.retryCount = Integer.parseInt(iconfig.properties.getProperty("retryCount", Integer.toString(5)));
        this.retryTime = Integer.parseInt(iconfig.properties.getProperty("retryAfterSeconds", Integer.toString(60)));
        this.timeout = Duration.ofSeconds(Integer.parseInt(iconfig.properties.getProperty("timeoutAfterSeconds", Integer.toString(180))));
        this.authorizationHeader = iconfig.properties.getProperty("authorizationHeader");
        this.metadataPrefix = iconfig.properties.getProperty("metadataPrefix");
        if (this.metadataPrefix == null) {
            throw new NullPointerException("No metadataPrefix for the OAI repository was given!");
        }
        this.identifierPrefix = iconfig.properties.getProperty("identifierPrefix", "");
        this.ignoreDatestamps = BooleanParser.parseBoolean(iconfig.properties.getProperty("ignoreDatestamps", "false"));
        this.deleteMissingDocuments = BooleanParser.parseBoolean(iconfig.properties.getProperty("deleteMissingDocuments", "true"));
        this.httpClient = HttpClient.newBuilder().followRedirects(HttpClient.Redirect.NORMAL).connectTimeout(this.timeout).cookieHandler(new CookieManager(null, CookiePolicy.ACCEPT_ORIGINAL_SERVER)).build();
    }

    @Override
    public void open(ElasticsearchConnection es, String targetIndex) throws Exception {
        super.open(es, targetIndex);
        this.recreateDigester();
    }

    @Override
    public void addDocument(MetadataDocument mdoc) throws Exception {
        OAIMetadataDocument omdoc;
        if (this.filterIncomingSets && this.sets != null && mdoc instanceof OAIMetadataDocument && Collections.disjoint((omdoc = (OAIMetadataDocument)mdoc).getSets(), this.sets)) {
            omdoc.setDeleted(true);
        }
        if (this.validIdentifiersBuilder != null && !mdoc.isDeleted()) {
            this.validIdentifiersBuilder.add(mdoc.getIdentifier());
        }
        super.addDocument(mdoc);
    }

    @Override
    public MetadataDocument createMetadataDocumentInstance() {
        return new OAIMetadataDocument(this.iconfig, this.identifierPrefix, this.ignoreDatestamps);
    }

    protected ObjectCreationFactory getMetadataDocumentFactory() {
        return new AbstractObjectCreationFactory(){

            public Object createObject(Attributes attributes) {
                return OAIHarvesterBase.this.createMetadataDocumentInstance();
            }
        };
    }

    protected abstract void recreateDigester();

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    protected boolean doParse(Supplier<ExtendedDigester> digSupplier, String url, AtomicReference<Instant> checkModifiedDate) throws Exception {
        URI u = new URI(url);
        int retry = 0;
        while (retry <= this.retryCount) {
            try {
                ExtendedDigester dig = digSupplier.get();
                dig.clear();
                dig.resetRoot();
                dig.push(this);
                InputSource is = this.getInputSource(u, checkModifiedDate);
                try {
                    if (checkModifiedDate != null && is == null) {
                        boolean bl = false;
                        return bl;
                    }
                    dig.parse(is);
                    return true;
                }
                finally {
                    if (is != null && is.getByteStream() != null) {
                        is.getByteStream().close();
                    }
                }
            }
            catch (SAXException saxe) {
                if (saxe.getException() == null) throw saxe;
                throw saxe.getException();
            }
            catch (RetryAfterIOException ioe) {
                int after = this.retryTime;
                if (retry >= this.retryCount) {
                    throw ioe.getCause();
                }
                this.log.warn((Object)ioe.getMessage());
                after = ioe.getRetryAfter();
                this.log.info((Object)("Retrying after " + after + " seconds (" + (this.retryCount - retry) + " retries left)..."));
                try {
                    Thread.sleep(1000L * (long)after);
                }
                catch (InterruptedException interruptedException) {
                    // empty catch block
                }
                this.log.debug((Object)"Recreating digester instances to recover from incomplete parsers...");
                this.recreateDigester();
                ++retry;
            }
        }
        throw new IOException("Unable to properly connect OAI server.");
    }

    protected EntityResolver getEntityResolver(final EntityResolver parent) {
        return new EntityResolver(){

            @Override
            public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException {
                try {
                    URI uri = new URI(systemId);
                    String proto = uri.getScheme().toLowerCase(Locale.ROOT);
                    if ("http".equals(proto) || "https".equals(proto)) {
                        return OAIHarvesterBase.this.getInputSource(uri, null);
                    }
                    return parent == null ? null : parent.resolveEntity(publicId, systemId);
                }
                catch (URISyntaxException e) {
                    return parent == null ? null : parent.resolveEntity(publicId, systemId);
                }
            }
        };
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    protected InputSource getInputSource(URI url, AtomicReference<Instant> checkModifiedDate) throws IOException {
        HttpResponse<InputStream> resp;
        String proto = url.getScheme().toLowerCase(Locale.ROOT);
        if (!"http".equals(proto) && !"https".equals(proto)) {
            throw new IllegalArgumentException("OAI only allows HTTP(S) as network protocol!");
        }
        HttpRequest.Builder reqBuilder = HttpRequest.newBuilder(url).GET().timeout(this.timeout).setHeader("User-Agent", USER_AGENT).setHeader("Accept-Charset", StandardCharsets.UTF_8.name() + ", *;q=0.1").setHeader("Accept", "text/xml, application/xml, *;q=0.1");
        HttpClientUtils.sendCompressionHeaders(reqBuilder);
        if (this.authorizationHeader != null) {
            reqBuilder.header("Authorization", this.authorizationHeader);
        }
        if (checkModifiedDate != null && checkModifiedDate.get() != null) {
            reqBuilder.setHeader("If-Modified-Since", DateTimeFormatter.RFC_1123_DATE_TIME.format(checkModifiedDate.get().atOffset(ZoneOffset.UTC)));
        }
        this.log.debug((Object)"Opening connection...");
        try {
            resp = HttpClientUtils.sendHttpRequestWithRetry(this.httpClient, reqBuilder.build(), HttpResponse.BodyHandlers.ofInputStream());
        }
        catch (IOException ioe) {
            throw new RetryAfterIOException(this.retryTime, ioe);
        }
        boolean success = false;
        int statusCode = resp.statusCode();
        switch (statusCode) {
            case 503: {
                IOException ioe1 = new IOException("OAI server returned '503 Service Unavailable'");
                Optional<Integer> retryAfter = resp.headers().firstValue("Retry-After").map(Integer::parseInt);
                if (retryAfter.isPresent()) {
                    throw new RetryAfterIOException(retryAfter.get(), "OAI server returned '503 Service Unavailable', repeating after " + String.valueOf(retryAfter.get()) + "s.", ioe1);
                }
                throw new RetryAfterIOException(this.retryTime, "OAI server returned error code, repeating after " + this.retryTime + "s: " + statusCode, ioe1);
            }
            case 304: {
                if (checkModifiedDate != null) {
                    this.log.debug((Object)("File not modified since " + String.valueOf(checkModifiedDate.get())));
                    InputSource inputSource = null;
                    return inputSource;
                }
                throw new IOException("OAI service returned 'not modified', although we did no 'If-Modified-Since' request.");
            }
            case 200: {
                break;
            }
            default: {
                IOException ioe2 = new IOException("OAI service returned invalid status code: " + statusCode);
                if (statusCode >= 500) {
                    throw new RetryAfterIOException(this.retryTime, "OAI Server returned error code, repeating after " + this.retryTime + "s: " + statusCode, ioe2);
                }
                throw ioe2;
            }
        }
        if (checkModifiedDate != null) {
            Instant d = resp.headers().firstValue("Last-Modified").map(DateTimeFormatter.RFC_1123_DATE_TIME::parse).map(Instant::from).orElse(null);
            checkModifiedDate.set(d);
        }
        String charset = resp.headers().firstValue("Content-Type").map(contentType -> {
            int charsetStart = (contentType = contentType.toLowerCase(Locale.ROOT)).indexOf("charset=");
            if (charsetStart >= 0) {
                int charsetEnd = contentType.indexOf(";", charsetStart);
                if (charsetEnd == -1) {
                    charsetEnd = contentType.length();
                }
                return contentType.substring(charsetStart += "charset=".length(), charsetEnd).trim();
            }
            return null;
        }).orElse(null);
        this.log.debug((Object)("Charset from Content-Type: '" + charset + "'"));
        InputSource src = new InputSource(HttpClientUtils.getDecompressingInputStream(resp));
        src.setSystemId(url.toString());
        src.setEncoding(charset);
        success = true;
        InputSource inputSource = src;
        return inputSource;
        finally {
            if (!success) {
                try {
                    resp.body().close();
                }
                catch (IOException iOException) {}
            }
        }
    }

    protected void reset() {
    }

    protected void enableMissingDocumentDelete() {
        if (this.validIdentifiersBuilder == null && this.deleteMissingDocuments) {
            this.log.info((Object)"Tracking of seen document identifiers enabled.");
            this.validIdentifiersBuilder = new HugeStringHashBuilder();
        }
    }

    protected void cancelMissingDocumentDelete() {
        this.log.info((Object)"Tracking of seen document identifiers cancelled, no deletions will happen.");
        this.validIdentifiersBuilder = null;
    }

    @Override
    public void close(boolean cleanShutdown) throws Exception {
        if (cleanShutdown && this.validIdentifiersBuilder != null) {
            this.setValidIdentifiers(this.validIdentifiersBuilder.build());
        }
        this.reset();
        super.close(cleanShutdown);
    }

    @Override
    protected void enumerateValidHarvesterPropertyNames(Set<String> props) {
        super.enumerateValidHarvesterPropertyNames(props);
        props.addAll(Arrays.asList("setSpec", "retryCount", "retryAfterSeconds", "timeoutAfterSeconds", "metadataPrefix", "identifierPrefix", "ignoreDatestamps", "deleteMissingDocuments", "authorizationHeader"));
    }
}

