package weka.core.converters;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.List;
import java.util.Vector;
import weka.core.Attribute;
import weka.core.CommandlineRunnable;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.OptionHandler;
import weka.core.RevisionUtils;
import weka.core.SerializedObject;
import weka.core.Utils;
import weka.gui.beans.xml.XMLBeans;

/* loaded from: input_file:weka/core/converters/TextDirectoryLoader.class */
public class TextDirectoryLoader extends AbstractLoader implements BatchConverter, IncrementalConverter, OptionHandler, CommandlineRunnable {
    private static final long serialVersionUID = 2592118773712247647L;
    protected List<LinkedList<String>> m_filesByClass;
    protected Instances m_structure = null;
    protected File m_sourceFile = new File(System.getProperty("user.dir"));
    protected boolean m_Debug = false;
    protected boolean m_OutputFilename = false;
    protected String m_charSet = "";
    protected int m_lastClassDir = 0;

    public TextDirectoryLoader() {
        setRetrieval(0);
    }

    public String globalInfo() {
        return "Loads all text files in a directory and uses the subdirectory names as class labels. The content of the text files will be stored in a String attribute, the filename can be stored as well.";
    }

    @Override // weka.core.OptionHandler
    public Enumeration<Option> listOptions() {
        Vector vector = new Vector();
        vector.add(new Option("\tEnables debug output.\n\t(default: off)", "D", 0, "-D"));
        vector.add(new Option("\tStores the filename in an additional attribute.\n\t(default: off)", "F", 0, "-F"));
        vector.add(new Option("\tThe directory to work on.\n\t(default: current directory)", XMLBeans.VAL_DIR, 0, "-dir <directory>"));
        vector.add(new Option("\tThe character set to use, e.g UTF-8.\n\t(default: use the default character set)", "charset", 1, "-charset <charset name>"));
        return vector.elements();
    }

    @Override // weka.core.OptionHandler
    public void setOptions(String[] strArr) throws Exception {
        setDebug(Utils.getFlag("D", strArr));
        setOutputFilename(Utils.getFlag("F", strArr));
        setDirectory(new File(Utils.getOption(XMLBeans.VAL_DIR, strArr)));
        String option = Utils.getOption("charset", strArr);
        this.m_charSet = "";
        if (option.length() > 0) {
            this.m_charSet = option;
        }
    }

    @Override // weka.core.OptionHandler
    public String[] getOptions() {
        Vector vector = new Vector();
        if (getDebug()) {
            vector.add("-D");
        }
        if (getOutputFilename()) {
            vector.add("-F");
        }
        vector.add("-dir");
        vector.add(getDirectory().getAbsolutePath());
        if (this.m_charSet != null && this.m_charSet.length() > 0) {
            vector.add("-charset");
            vector.add(this.m_charSet);
        }
        return (String[]) vector.toArray(new String[vector.size()]);
    }

    public String charSetTipText() {
        return "The character set to use when reading text files (eg UTF-8) - leave blank to use the default character set.";
    }

    public void setCharSet(String str) {
        this.m_charSet = str;
    }

    public String getCharSet() {
        return this.m_charSet;
    }

    public void setDebug(boolean z) {
        this.m_Debug = z;
    }

    public boolean getDebug() {
        return this.m_Debug;
    }

    public String debugTipText() {
        return "Whether to print additional debug information to the console.";
    }

    public void setOutputFilename(boolean z) {
        this.m_OutputFilename = z;
        reset();
    }

    public boolean getOutputFilename() {
        return this.m_OutputFilename;
    }

    public String outputFilenameTipText() {
        return "Whether to store the filename in an additional attribute.";
    }

    public String getFileDescription() {
        return "Directories";
    }

    public File getDirectory() {
        return new File(this.m_sourceFile.getAbsolutePath());
    }

    public void setDirectory(File file) throws IOException {
        setSource(file);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void reset() {
        this.m_structure = null;
        this.m_filesByClass = null;
        this.m_lastClassDir = 0;
        setRetrieval(0);
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public void setSource(File file) throws IOException {
        reset();
        if (file == null) {
            throw new IOException("Source directory object is null!");
        }
        this.m_sourceFile = file;
        if (!file.exists() || !file.isDirectory()) {
            throw new IOException("Directory '" + file + "' not found");
        }
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getStructure() throws IOException {
        if (getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        if (this.m_structure == null) {
            String absolutePath = getDirectory().getAbsolutePath();
            ArrayList arrayList = new ArrayList();
            ArrayList arrayList2 = new ArrayList();
            for (String str : new File(absolutePath).list()) {
                if (new File(absolutePath + File.separator + str).isDirectory()) {
                    arrayList2.add(str);
                }
            }
            arrayList.add(new Attribute("text", (ArrayList) null));
            if (this.m_OutputFilename) {
                arrayList.add(new Attribute("filename", (ArrayList) null));
            }
            arrayList.add(new Attribute("@@class@@", arrayList2));
            this.m_structure = new Instances(absolutePath.replaceAll("/", "_").replaceAll("\\\\", "_").replaceAll(":", "_"), (ArrayList<Attribute>) arrayList, 0);
            this.m_structure.setClassIndex(this.m_structure.numAttributes() - 1);
        }
        return this.m_structure;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instances getDataSet() throws IOException {
        if (getDirectory() == null) {
            throw new IOException("No directory/source has been specified");
        }
        String absolutePath = getDirectory().getAbsolutePath();
        ArrayList arrayList = new ArrayList();
        Enumeration<Object> enumerateValues = getStructure().classAttribute().enumerateValues();
        while (enumerateValues.hasMoreElements()) {
            Object nextElement = enumerateValues.nextElement();
            if (nextElement instanceof SerializedObject) {
                arrayList.add(((SerializedObject) nextElement).getObject().toString());
            } else {
                arrayList.add(nextElement.toString());
            }
        }
        Instances structure = getStructure();
        int i = 0;
        for (int i2 = 0; i2 < arrayList.size(); i2++) {
            String str = (String) arrayList.get(i2);
            for (String str2 : new File(absolutePath + File.separator + str).list()) {
                try {
                    i++;
                    if (getDebug()) {
                        System.err.println("processing " + i + " : " + str + " : " + str2);
                    }
                    double[] dArr = this.m_OutputFilename ? new double[3] : new double[2];
                    File file = new File(absolutePath + File.separator + str + File.separator + str2);
                    BufferedReader bufferedReader = (this.m_charSet == null || this.m_charSet.length() == 0) ? new BufferedReader(new InputStreamReader(new FileInputStream(file))) : new BufferedReader(new InputStreamReader(new FileInputStream(file), this.m_charSet));
                    StringBuffer stringBuffer = new StringBuffer();
                    while (true) {
                        int read = bufferedReader.read();
                        if (read == -1) {
                            break;
                        }
                        stringBuffer.append((char) read);
                    }
                    dArr[0] = structure.attribute(0).addStringValue(stringBuffer.toString());
                    if (this.m_OutputFilename) {
                        dArr[1] = structure.attribute(1).addStringValue(str + File.separator + str2);
                    }
                    dArr[structure.classIndex()] = i2;
                    structure.add((Instance) new DenseInstance(1.0d, dArr));
                    bufferedReader.close();
                } catch (Exception e) {
                    System.err.println("failed to convert file: " + absolutePath + File.separator + str + File.separator + str2);
                }
            }
        }
        return structure;
    }

    @Override // weka.core.converters.AbstractLoader, weka.core.converters.Loader
    public Instance getNextInstance(Instances instances) throws IOException {
        String absolutePath = getDirectory().getAbsolutePath();
        Attribute classAttribute = instances.classAttribute();
        if (this.m_filesByClass == null) {
            this.m_filesByClass = new ArrayList();
            for (int i = 0; i < classAttribute.numValues(); i++) {
                String[] list = new File(absolutePath + File.separator + classAttribute.value(i)).list();
                LinkedList<String> linkedList = new LinkedList<>();
                for (String str : list) {
                    if (new File(absolutePath + File.separator + classAttribute.value(i) + File.separator + str).isFile()) {
                        linkedList.add(str);
                    }
                }
                this.m_filesByClass.add(linkedList);
            }
        }
        int i2 = 0;
        LinkedList<String> linkedList2 = this.m_filesByClass.get(this.m_lastClassDir);
        boolean z = linkedList2.size() > 0;
        while (true) {
            if (linkedList2.size() != 0) {
                break;
            }
            this.m_lastClassDir++;
            i2++;
            if (this.m_lastClassDir == instances.classAttribute().numValues()) {
                this.m_lastClassDir = 0;
            }
            linkedList2 = this.m_filesByClass.get(this.m_lastClassDir);
            if (linkedList2.size() > 0) {
                z = true;
                break;
            }
            if (i2 == instances.classAttribute().numValues()) {
                break;
            }
        }
        if (!z) {
            return null;
        }
        File file = new File(absolutePath + File.separator + classAttribute.value(this.m_lastClassDir) + File.separator + linkedList2.poll());
        BufferedReader bufferedReader = (this.m_charSet == null || this.m_charSet.length() == 0) ? new BufferedReader(new InputStreamReader(new FileInputStream(file))) : new BufferedReader(new InputStreamReader(new FileInputStream(file), this.m_charSet));
        StringBuffer stringBuffer = new StringBuffer();
        while (true) {
            int read = bufferedReader.read();
            if (read == -1) {
                break;
            }
            stringBuffer.append((char) read);
        }
        double[] dArr = this.m_OutputFilename ? new double[3] : new double[2];
        dArr[0] = 0.0d;
        instances.attribute(0).setStringValue(stringBuffer.toString());
        if (this.m_OutputFilename) {
            dArr[1] = 0.0d;
            instances.attribute(1).setStringValue(file.getAbsolutePath());
        }
        dArr[instances.classIndex()] = this.m_lastClassDir;
        DenseInstance denseInstance = new DenseInstance(1.0d, dArr);
        denseInstance.setDataset(instances);
        bufferedReader.close();
        this.m_lastClassDir++;
        if (this.m_lastClassDir == instances.classAttribute().numValues()) {
            this.m_lastClassDir = 0;
        }
        return denseInstance;
    }

    @Override // weka.core.RevisionHandler
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 12184 $");
    }

    public static void main(String[] strArr) {
        TextDirectoryLoader textDirectoryLoader = new TextDirectoryLoader();
        textDirectoryLoader.run(textDirectoryLoader, strArr);
    }

    @Override // weka.core.CommandlineRunnable
    public void preExecution() throws Exception {
    }

    @Override // weka.core.CommandlineRunnable
    public void postExecution() throws Exception {
    }

    @Override // weka.core.CommandlineRunnable
    public void run(Object obj, String[] strArr) throws IllegalArgumentException {
        Instance nextInstance;
        if (!(obj instanceof TextDirectoryLoader)) {
            throw new IllegalArgumentException("Object to execute is not a TextDirectoryLoader!");
        }
        TextDirectoryLoader textDirectoryLoader = (TextDirectoryLoader) obj;
        if (strArr.length <= 0) {
            System.err.println("\nUsage:\n\tTextDirectoryLoader [options]\n\nOptions:\n");
            Enumeration<Option> listOptions = new TextDirectoryLoader().listOptions();
            while (listOptions.hasMoreElements()) {
                Option nextElement = listOptions.nextElement();
                System.err.println(nextElement.synopsis());
                System.err.println(nextElement.description());
            }
            System.err.println();
            return;
        }
        try {
            textDirectoryLoader.setOptions(strArr);
            Instances structure = textDirectoryLoader.getStructure();
            System.out.println(structure);
            do {
                nextInstance = textDirectoryLoader.getNextInstance(structure);
                if (nextInstance != null) {
                    System.out.println(nextInstance);
                }
            } while (nextInstance != null);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
