/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.protocol.ftp;

import crawlercommons.robots.BaseRobotRules;
import java.lang.invoke.MethodHandles;
import java.net.URL;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.Protocol;
import org.apache.nutch.protocol.ProtocolOutput;
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.protocol.RobotRulesParser;
import org.apache.nutch.protocol.ftp.Ftp;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FtpRobotRulesParser
extends RobotRulesParser {
    private static final String CONTENT_TYPE = "text/plain";
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

    FtpRobotRulesParser() {
    }

    public FtpRobotRulesParser(Configuration conf) {
        super(conf);
    }

    public BaseRobotRules getRobotRulesSet(Protocol ftp, URL url, List<Content> robotsTxtContent) {
        BaseRobotRules robotRules;
        String protocol = url.getProtocol().toLowerCase();
        String host = url.getHost().toLowerCase();
        if (LOG.isTraceEnabled() && this.isAllowListed(url)) {
            LOG.trace("Ignoring robots.txt (host is allowlisted) for URL: {}", (Object)url);
        }
        if ((robotRules = (BaseRobotRules)CACHE.get(protocol + ":" + host)) != null) {
            return robotRules;
        }
        LOG.trace("cache miss {}", (Object)url);
        boolean cacheRule = true;
        if (this.isAllowListed(url)) {
            robotRules = EMPTY_RULES;
            LOG.info("Allowlisted host found for: {}", (Object)url);
            LOG.info("Ignoring robots.txt for all URLs from allowlisted host: {}", (Object)host);
        } else {
            try {
                Text robotsUrl = new Text(new URL(url, "/robots.txt").toString());
                ProtocolOutput output = ((Ftp)ftp).getProtocolOutput(robotsUrl, new CrawlDatum());
                ProtocolStatus status = output.getStatus();
                if (robotsTxtContent != null) {
                    robotsTxtContent.add(output.getContent());
                }
                robotRules = status.getCode() == 1 ? this.parseRules(url.toString(), output.getContent().getContent(), CONTENT_TYPE, this.agentNames) : EMPTY_RULES;
            }
            catch (Throwable t) {
                LOG.info("Couldn't get robots.txt for {}: {}", (Object)url, (Object)t.toString());
                cacheRule = false;
                robotRules = EMPTY_RULES;
            }
        }
        if (cacheRule) {
            CACHE.put(protocol + ":" + host, robotRules);
        }
        return robotRules;
    }
}

