package weka.filters.unsupervised.attribute;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.ObjectInputStream;
import java.util.Enumeration;
import java.util.Vector;
import weka.clusterers.AbstractClusterer;
import weka.clusterers.Clusterer;
import weka.clusterers.SimpleKMeans;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.FastVector;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.TestInstances;
import weka.core.Utils;
import weka.core.WekaException;
import weka.filters.Filter;
import weka.filters.UnsupervisedFilter;

/* loaded from: input_file:weka/filters/unsupervised/attribute/AddCluster.class */
public class AddCluster extends Filter implements UnsupervisedFilter, OptionHandler {
    static final long serialVersionUID = 7414280611943807337L;
    protected Clusterer m_Clusterer = new SimpleKMeans();
    protected File m_SerializedClustererFile = new File(System.getProperty("user.dir"));
    protected Clusterer m_ActualClusterer = null;
    protected Range m_IgnoreAttributesRange = null;
    protected Filter m_removeAttributes = new Remove();

    @Override // weka.filters.Filter
    public Capabilities getCapabilities(Instances instances) {
        Instances instances2 = new Instances(instances, 0);
        instances2.setClassIndex(-1);
        return super.getCapabilities(instances2);
    }

    @Override // weka.filters.Filter, weka.core.CapabilitiesHandler
    public Capabilities getCapabilities() {
        Capabilities capabilities = this.m_Clusterer.getCapabilities();
        capabilities.setMinimumNumberInstances(0);
        return capabilities;
    }

    @Override // weka.filters.Filter
    protected void testInputFormat(Instances instances) throws Exception {
        getCapabilities(instances).testWithFail(removeIgnored(instances));
    }

    @Override // weka.filters.Filter
    public boolean setInputFormat(Instances instances) throws Exception {
        super.setInputFormat(instances);
        this.m_removeAttributes = null;
        return false;
    }

    protected Instances removeIgnored(Instances instances) throws Exception {
        String str;
        Instances instances2 = instances;
        if (this.m_IgnoreAttributesRange != null || instances.classIndex() >= 0) {
            this.m_removeAttributes = new Remove();
            str = "";
            str = this.m_IgnoreAttributesRange != null ? str + this.m_IgnoreAttributesRange.getRanges() : "";
            if (instances.classIndex() >= 0) {
                str = str.length() > 0 ? str + "," + (instances.classIndex() + 1) : "" + (instances.classIndex() + 1);
            }
            ((Remove) this.m_removeAttributes).setAttributeIndices(str);
            ((Remove) this.m_removeAttributes).setInvertSelection(false);
            this.m_removeAttributes.setInputFormat(instances);
            instances2 = Filter.useFilter(instances, this.m_removeAttributes);
        }
        return instances2;
    }

    @Override // weka.filters.Filter
    public boolean batchFinished() throws Exception {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        Instances inputFormat = getInputFormat();
        if (!isFirstBatchDone()) {
            Instances removeIgnored = removeIgnored(inputFormat);
            File serializedClustererFile = getSerializedClustererFile();
            if (serializedClustererFile.isDirectory()) {
                this.m_ActualClusterer = AbstractClusterer.makeCopy(this.m_Clusterer);
                this.m_ActualClusterer.buildClusterer(removeIgnored);
            } else {
                ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(serializedClustererFile));
                this.m_ActualClusterer = (Clusterer) objectInputStream.readObject();
                Instances instances = null;
                try {
                    instances = (Instances) objectInputStream.readObject();
                } catch (Exception e) {
                }
                objectInputStream.close();
                if (instances != null && !instances.equalHeaders(removeIgnored)) {
                    throw new WekaException("Training header of clusterer and filter dataset don't match:\n" + instances.equalHeadersMsg(removeIgnored));
                }
            }
            Instances instances2 = new Instances(inputFormat, 0);
            FastVector fastVector = new FastVector(this.m_ActualClusterer.numberOfClusters());
            for (int i = 0; i < this.m_ActualClusterer.numberOfClusters(); i++) {
                fastVector.addElement("cluster" + (i + 1));
            }
            instances2.insertAttributeAt(new Attribute("cluster", fastVector), instances2.numAttributes());
            setOutputFormat(instances2);
        }
        for (int i2 = 0; i2 < inputFormat.numInstances(); i2++) {
            convertInstance(inputFormat.instance(i2));
        }
        flushInput();
        this.m_NewBatch = true;
        this.m_FirstBatchDone = true;
        return numPendingOutput() != 0;
    }

    @Override // weka.filters.Filter
    public boolean input(Instance instance) throws Exception {
        if (getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            resetQueue();
            this.m_NewBatch = false;
        }
        if (outputFormatPeek() != null) {
            convertInstance(instance);
            return true;
        }
        bufferInput(instance);
        return false;
    }

    protected void convertInstance(Instance instance) throws Exception {
        Instance instance2;
        double[] dArr = new double[instance.numAttributes() + 1];
        for (int i = 0; i < instance.numAttributes(); i++) {
            dArr[i] = instance.value(i);
        }
        if (this.m_removeAttributes != null) {
            this.m_removeAttributes.input(instance);
            instance2 = this.m_removeAttributes.output();
        } else {
            instance2 = instance;
        }
        try {
            dArr[instance.numAttributes()] = this.m_ActualClusterer.clusterInstance(instance2);
        } catch (Exception e) {
            dArr[instance.numAttributes()] = Utils.missingValue();
        }
        Instance sparseInstance = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), dArr) : new DenseInstance(instance.weight(), dArr);
        sparseInstance.setDataset(instance.dataset());
        copyValues(sparseInstance, false, instance.dataset(), getOutputFormat());
        sparseInstance.setDataset(getOutputFormat());
        push(sparseInstance);
    }

    @Override // weka.core.OptionHandler
    public Enumeration listOptions() {
        Vector vector = new Vector();
        vector.addElement(new Option("\tFull class name of clusterer to use, followed\n\tby scheme options. eg:\n\t\t\"weka.clusterers.SimpleKMeans -N 3\"\n\t(default: weka.clusterers.SimpleKMeans)", "W", 1, "-W <clusterer specification>"));
        vector.addElement(new Option("\tInstead of building a clusterer on the data, one can also provide\n\ta serialized model and use that for adding the clusters.", "serialized", 1, "-serialized <file>"));
        vector.addElement(new Option("\tThe range of attributes the clusterer should ignore.\n", "I", 1, "-I <att1,att2-att4,...>"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        boolean z = false;
        String option = Utils.getOption("serialized", strArr);
        if (option.length() != 0) {
            File file = new File(option);
            if (!file.exists()) {
                throw new FileNotFoundException("File '" + file.getAbsolutePath() + "' not found!");
            }
            if (file.isDirectory()) {
                throw new FileNotFoundException("'" + file.getAbsolutePath() + "' points to a directory not a file!");
            }
            setSerializedClustererFile(file);
            z = true;
        } else {
            setSerializedClustererFile(null);
        }
        if (!z) {
            String option2 = Utils.getOption('W', strArr);
            if (option2.length() == 0) {
                option2 = SimpleKMeans.class.getName();
            }
            String[] splitOptions = Utils.splitOptions(option2);
            if (splitOptions.length == 0) {
                throw new Exception("Invalid clusterer specification string");
            }
            String str = splitOptions[0];
            splitOptions[0] = "";
            setClusterer(AbstractClusterer.forName(str, splitOptions));
        }
        setIgnoredAttributeIndices(Utils.getOption('I', strArr));
        Utils.checkForRemainingOptions(strArr);
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        File serializedClustererFile = getSerializedClustererFile();
        if (serializedClustererFile == null || serializedClustererFile.isDirectory()) {
            vector.add("-W");
            vector.add(getClustererSpec());
        } else {
            vector.add("-serialized");
            vector.add(serializedClustererFile.getAbsolutePath());
        }
        if (!getIgnoredAttributeIndices().equals("")) {
            vector.add("-I");
            vector.add(getIgnoredAttributeIndices());
        }
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    public String globalInfo() {
        return "A filter that adds a new nominal attribute representing the cluster assigned to each instance by the specified clustering algorithm.\nEither the clustering algorithm gets built with the first batch of data or one specifies are serialized clusterer model file to use instead.";
    }

    public String clustererTipText() {
        return "The clusterer to assign clusters with.";
    }

    public void setClusterer(Clusterer clusterer) {
        this.m_Clusterer = clusterer;
    }

    public Clusterer getClusterer() {
        return this.m_Clusterer;
    }

    protected String getClustererSpec() {
        Clusterer clusterer = getClusterer();
        return clusterer instanceof OptionHandler ? clusterer.getClass().getName() + TestInstances.DEFAULT_SEPARATORS + Utils.joinOptions(((OptionHandler) clusterer).getOptions()) : clusterer.getClass().getName();
    }

    public String ignoredAttributeIndicesTipText() {
        return "The range of attributes to be ignored by the clusterer. eg: first-3,5,9-last";
    }

    public String getIgnoredAttributeIndices() {
        return this.m_IgnoreAttributesRange == null ? "" : this.m_IgnoreAttributesRange.getRanges();
    }

    public void setIgnoredAttributeIndices(String str) {
        if (str == null || str.length() == 0) {
            this.m_IgnoreAttributesRange = null;
        } else {
            this.m_IgnoreAttributesRange = new Range();
            this.m_IgnoreAttributesRange.setRanges(str);
        }
    }

    public File getSerializedClustererFile() {
        return this.m_SerializedClustererFile;
    }

    public void setSerializedClustererFile(File file) {
        if (file == null || !file.exists()) {
            file = new File(System.getProperty("user.dir"));
        }
        this.m_SerializedClustererFile = file;
    }

    public String serializedClustererFileTipText() {
        return "A file containing the serialized model of a built clusterer.";
    }

    @Override // weka.filters.Filter, weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 8034 $");
    }

    public static void main(String[] strArr) {
        runFilter(new AddCluster(), strArr);
    }
}
