/*
 * Decompiled with CFR 0.152.
 */
package edu.udo.cs.wvtool.main;

import edu.udo.cs.wvtool.config.WVTConfiguration;
import edu.udo.cs.wvtool.config.WVTConfigurationFact;
import edu.udo.cs.wvtool.generic.charmapper.WVTCharConverter;
import edu.udo.cs.wvtool.generic.inputfilter.WVTInputFilter;
import edu.udo.cs.wvtool.generic.loader.WVTDocumentLoader;
import edu.udo.cs.wvtool.generic.output.WVTOutputFilter;
import edu.udo.cs.wvtool.generic.stemmer.WVTStemmer;
import edu.udo.cs.wvtool.generic.tokenizer.WVTTokenizer;
import edu.udo.cs.wvtool.generic.vectorcreation.TFIDF;
import edu.udo.cs.wvtool.generic.vectorcreation.WVTVectorCreator;
import edu.udo.cs.wvtool.generic.wordfilter.WVTWordFilter;
import edu.udo.cs.wvtool.main.WVTDocumentInfo;
import edu.udo.cs.wvtool.main.WVTInputList;
import edu.udo.cs.wvtool.main.WVTWordVector;
import edu.udo.cs.wvtool.main.WVToolWordListener;
import edu.udo.cs.wvtool.util.TokenEnumeration;
import edu.udo.cs.wvtool.util.WVToolException;
import edu.udo.cs.wvtool.util.WVToolLogger;
import edu.udo.cs.wvtool.wordlist.WVTWordList;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;

public class WVTool {
    private static final int DEFAULT_PRUNE_MIN = 4;
    private static final int DEFAULT_PRUNE_MAX = 2000;
    private boolean skipErrors = true;

    public WVTool(boolean bl) {
        this.skipErrors = bl;
    }

    public InputStream getInputStream(WVTDocumentInfo wVTDocumentInfo, WVTConfiguration wVTConfiguration) throws WVToolException {
        WVTDocumentLoader wVTDocumentLoader = null;
        wVTDocumentLoader = (WVTDocumentLoader)wVTConfiguration.getComponentForStep("loader", wVTDocumentInfo);
        return wVTDocumentLoader.loadDocument(wVTDocumentInfo);
    }

    public Reader getReader(WVTDocumentInfo wVTDocumentInfo, WVTConfiguration wVTConfiguration) throws WVToolException {
        WVTDocumentLoader wVTDocumentLoader = null;
        WVTInputFilter wVTInputFilter = null;
        wVTDocumentLoader = (WVTDocumentLoader)wVTConfiguration.getComponentForStep("loader", wVTDocumentInfo);
        wVTInputFilter = (WVTInputFilter)wVTConfiguration.getComponentForStep("inputfilter", wVTDocumentInfo);
        return wVTInputFilter.convertToPlainText(wVTDocumentLoader.loadDocument(wVTDocumentInfo), wVTDocumentInfo);
    }

    public WVTWordList createWordList(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration) throws WVToolException {
        return this.createWordList(wVTInputList, wVTConfiguration, new LinkedList(), true);
    }

    public WVTWordList createWordList(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration, List list, boolean bl) throws WVToolException {
        WVTWordList wVTWordList = new WVTWordList(list, wVTInputList.getNumClasses());
        wVTWordList.setAppendWords(bl);
        wVTWordList.setUpdateOnlyCurrent(false);
        WVTDocumentLoader wVTDocumentLoader = null;
        WVTInputFilter wVTInputFilter = null;
        WVTCharConverter wVTCharConverter = null;
        WVTTokenizer wVTTokenizer = null;
        WVTWordFilter wVTWordFilter = null;
        WVTStemmer wVTStemmer = null;
        Iterator iterator = wVTInputList.getEntries();
        while (iterator.hasNext()) {
            WVTDocumentInfo wVTDocumentInfo = (WVTDocumentInfo)iterator.next();
            try {
                wVTDocumentLoader = (WVTDocumentLoader)wVTConfiguration.getComponentForStep("loader", wVTDocumentInfo);
                wVTInputFilter = (WVTInputFilter)wVTConfiguration.getComponentForStep("inputfilter", wVTDocumentInfo);
                wVTCharConverter = (WVTCharConverter)wVTConfiguration.getComponentForStep("charmapper", wVTDocumentInfo);
                wVTTokenizer = (WVTTokenizer)wVTConfiguration.getComponentForStep("tokenizer", wVTDocumentInfo);
                wVTWordFilter = (WVTWordFilter)wVTConfiguration.getComponentForStep("wordfilter", wVTDocumentInfo);
                wVTStemmer = (WVTStemmer)wVTConfiguration.getComponentForStep("stemmer", wVTDocumentInfo);
                TokenEnumeration tokenEnumeration = wVTStemmer.stem(wVTWordFilter.filter(wVTTokenizer.tokenize(wVTCharConverter.convertChars(wVTInputFilter.convertToPlainText(wVTDocumentLoader.loadDocument(wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo);
                while (tokenEnumeration.hasMoreTokens()) {
                    wVTWordList.addWordOccurance(tokenEnumeration.nextToken());
                }
                wVTWordList.closeDocument(wVTDocumentInfo);
                wVTDocumentLoader.close(wVTDocumentInfo);
            }
            catch (WVToolException wVToolException) {
                WVToolLogger.getGlobalLogger().logException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
                wVTDocumentLoader.close(wVTDocumentInfo);
                if (this.skipErrors) continue;
                throw new WVToolException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
            }
        }
        return wVTWordList;
    }

    public void createVectors(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration, int n, int n2) throws WVToolException {
        WVTWordList wVTWordList = this.createWordList(wVTInputList, wVTConfiguration);
        wVTWordList.pruneByFrequency(4, 2000);
        this.createVectors(wVTInputList, wVTConfiguration, wVTWordList);
    }

    public void createVectors(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration) throws WVToolException {
        this.createVectors(wVTInputList, wVTConfiguration, 4, 2000);
    }

    public void createVectors(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration, WVTWordList wVTWordList) throws WVToolException {
        wVTWordList.setAppendWords(false);
        wVTWordList.setUpdateOnlyCurrent(true);
        WVTDocumentLoader wVTDocumentLoader = null;
        WVTInputFilter wVTInputFilter = null;
        WVTCharConverter wVTCharConverter = null;
        WVTTokenizer wVTTokenizer = null;
        WVTWordFilter wVTWordFilter = null;
        WVTStemmer wVTStemmer = null;
        WVTVectorCreator wVTVectorCreator = null;
        WVTOutputFilter wVTOutputFilter = null;
        Iterator iterator = wVTInputList.getEntries();
        while (iterator.hasNext()) {
            WVTDocumentInfo wVTDocumentInfo = (WVTDocumentInfo)iterator.next();
            try {
                wVTDocumentLoader = (WVTDocumentLoader)wVTConfiguration.getComponentForStep("loader", wVTDocumentInfo);
                wVTInputFilter = (WVTInputFilter)wVTConfiguration.getComponentForStep("inputfilter", wVTDocumentInfo);
                wVTCharConverter = (WVTCharConverter)wVTConfiguration.getComponentForStep("charmapper", wVTDocumentInfo);
                wVTTokenizer = (WVTTokenizer)wVTConfiguration.getComponentForStep("tokenizer", wVTDocumentInfo);
                wVTWordFilter = (WVTWordFilter)wVTConfiguration.getComponentForStep("wordfilter", wVTDocumentInfo);
                wVTStemmer = (WVTStemmer)wVTConfiguration.getComponentForStep("stemmer", wVTDocumentInfo);
                wVTVectorCreator = (WVTVectorCreator)wVTConfiguration.getComponentForStep("vectorcreation", wVTDocumentInfo);
                wVTOutputFilter = (WVTOutputFilter)wVTConfiguration.getComponentForStep("output", wVTDocumentInfo);
                TokenEnumeration tokenEnumeration = wVTStemmer.stem(wVTWordFilter.filter(wVTTokenizer.tokenize(wVTCharConverter.convertChars(wVTInputFilter.convertToPlainText(wVTDocumentLoader.loadDocument(wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo);
                while (tokenEnumeration.hasMoreTokens()) {
                    wVTWordList.addWordOccurance(tokenEnumeration.nextToken());
                }
                wVTOutputFilter.write(wVTVectorCreator.createVector(wVTWordList.getFrequenciesForCurrentDocument(), wVTWordList.getTermCountForCurrentDocument(), wVTWordList, wVTDocumentInfo));
                wVTWordList.closeDocument(wVTDocumentInfo);
                wVTDocumentLoader.close(wVTDocumentInfo);
            }
            catch (WVToolException wVToolException) {
                WVToolLogger.getGlobalLogger().logException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
                wVTDocumentLoader.close(wVTDocumentInfo);
                if (this.skipErrors) continue;
                throw new WVToolException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
            }
        }
    }

    public WVTWordVector createVector(String string, WVTDocumentInfo wVTDocumentInfo, WVTConfiguration wVTConfiguration, WVTWordList wVTWordList) throws WVToolException {
        WVTWordVector wVTWordVector;
        block3: {
            wVTWordList.setAppendWords(false);
            wVTWordList.setUpdateOnlyCurrent(true);
            WVTCharConverter wVTCharConverter = null;
            WVTTokenizer wVTTokenizer = null;
            WVTWordFilter wVTWordFilter = null;
            WVTStemmer wVTStemmer = null;
            WVTVectorCreator wVTVectorCreator = null;
            wVTWordVector = null;
            try {
                wVTCharConverter = (WVTCharConverter)wVTConfiguration.getComponentForStep("charmapper", wVTDocumentInfo);
                wVTTokenizer = (WVTTokenizer)wVTConfiguration.getComponentForStep("tokenizer", wVTDocumentInfo);
                wVTWordFilter = (WVTWordFilter)wVTConfiguration.getComponentForStep("wordfilter", wVTDocumentInfo);
                wVTStemmer = (WVTStemmer)wVTConfiguration.getComponentForStep("stemmer", wVTDocumentInfo);
                wVTVectorCreator = (WVTVectorCreator)wVTConfiguration.getComponentForStep("vectorcreation", wVTDocumentInfo);
                TokenEnumeration tokenEnumeration = wVTStemmer.stem(wVTWordFilter.filter(wVTTokenizer.tokenize(wVTCharConverter.convertChars(new StringReader(string), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo);
                while (tokenEnumeration.hasMoreTokens()) {
                    wVTWordList.addWordOccurance(tokenEnumeration.nextToken());
                }
                wVTWordVector = wVTVectorCreator.createVector(wVTWordList.getFrequenciesForCurrentDocument(), wVTWordList.getTermCountForCurrentDocument(), wVTWordList, wVTDocumentInfo);
                wVTWordList.closeDocument(wVTDocumentInfo);
            }
            catch (WVToolException wVToolException) {
                WVToolLogger.getGlobalLogger().logException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
                if (this.skipErrors) break block3;
                throw new WVToolException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
            }
        }
        return wVTWordVector;
    }

    public WVTWordVector createVector(String string, WVTWordList wVTWordList) throws WVToolException {
        WVTConfiguration wVTConfiguration = new WVTConfiguration();
        wVTConfiguration.setConfigurationRule("vectorcreation", new WVTConfigurationFact(new TFIDF()));
        return this.createVector(string, new WVTDocumentInfo("", "", "", ""), wVTConfiguration, wVTWordList);
    }

    public void iterateWords(WVTInputList wVTInputList, WVTConfiguration wVTConfiguration, WVToolWordListener wVToolWordListener) throws WVToolException {
        WVTDocumentLoader wVTDocumentLoader = null;
        WVTInputFilter wVTInputFilter = null;
        WVTCharConverter wVTCharConverter = null;
        WVTTokenizer wVTTokenizer = null;
        WVTWordFilter wVTWordFilter = null;
        WVTStemmer wVTStemmer = null;
        Iterator iterator = wVTInputList.getEntries();
        while (iterator.hasNext()) {
            WVTDocumentInfo wVTDocumentInfo = (WVTDocumentInfo)iterator.next();
            wVToolWordListener.openNewDocument(wVTDocumentInfo);
            try {
                wVTDocumentLoader = (WVTDocumentLoader)wVTConfiguration.getComponentForStep("loader", wVTDocumentInfo);
                wVTInputFilter = (WVTInputFilter)wVTConfiguration.getComponentForStep("inputfilter", wVTDocumentInfo);
                wVTCharConverter = (WVTCharConverter)wVTConfiguration.getComponentForStep("charmapper", wVTDocumentInfo);
                wVTTokenizer = (WVTTokenizer)wVTConfiguration.getComponentForStep("tokenizer", wVTDocumentInfo);
                wVTWordFilter = (WVTWordFilter)wVTConfiguration.getComponentForStep("wordfilter", wVTDocumentInfo);
                wVTStemmer = (WVTStemmer)wVTConfiguration.getComponentForStep("stemmer", wVTDocumentInfo);
                TokenEnumeration tokenEnumeration = wVTStemmer.stem(wVTWordFilter.filter(wVTTokenizer.tokenize(wVTCharConverter.convertChars(wVTInputFilter.convertToPlainText(wVTDocumentLoader.loadDocument(wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo), wVTDocumentInfo);
                while (tokenEnumeration.hasMoreTokens()) {
                    wVToolWordListener.processWord(tokenEnumeration.nextToken());
                }
                wVTDocumentLoader.close(wVTDocumentInfo);
            }
            catch (WVToolException wVToolException) {
                WVToolLogger.getGlobalLogger().logException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
                wVTDocumentLoader.close(wVTDocumentInfo);
                if (this.skipErrors) continue;
                throw new WVToolException("Problems processing document " + wVTDocumentInfo.getSourceName(), wVToolException);
            }
        }
    }
}

