package org.cogroo.formats.ad;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.atomic.AtomicInteger;
import opennlp.tools.chunker.ChunkSample;
import opennlp.tools.formats.ad.ADSentenceStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Span;
import org.cogroo.tools.featurizer.WordTag;
import org.cogroo.tools.shallowparser.ShallowParserSequenceValidator;

/* loaded from: input_file:org/cogroo/formats/ad/ADChunkBasedShallowParserSampleStream.class */
public class ADChunkBasedShallowParserSampleStream extends ADChunk2SampleStream {
    private final Set<String> functTagSet;
    private String[] defaultFunctTags;
    private boolean readChunk;
    private ShallowParserSequenceValidator sv;
    private ArrayList<String> chunks;
    private SubjectTypes subjectTypes;

    /* JADX INFO: Access modifiers changed from: package-private */
    /* loaded from: input_file:org/cogroo/formats/ad/ADChunkBasedShallowParserSampleStream$SubjectTypes.class */
    public static class SubjectTypes {
        private Map<String, AtomicInteger> subjects = new HashMap();
        private Map<String, String> examples = new HashMap();

        SubjectTypes() {
        }

        public void add(ChunkSample chunkSample) {
            for (Span span : chunkSample.getPhrasesAsSpanList()) {
                if (span.getType().equals("SUBJ")) {
                    String[] extractChunk = extractChunk((String[]) Arrays.copyOfRange(chunkSample.getTags(), span.getStart(), span.getEnd()));
                    Span[] phrasesAsSpanList = ChunkSample.phrasesAsSpanList(extractChunk, extractChunk, extractChunk);
                    StringBuilder sb = new StringBuilder();
                    for (Span span2 : phrasesAsSpanList) {
                        sb.append(span2.getType()).append(" ");
                    }
                    String trim = sb.toString().trim();
                    if (this.subjects.containsKey(trim)) {
                        this.subjects.get(trim).incrementAndGet();
                    } else {
                        this.subjects.put(trim, new AtomicInteger(1));
                        this.examples.put(trim, Arrays.toString(Arrays.copyOfRange(chunkSample.getSentence(), span.getStart(), span.getEnd())));
                    }
                }
            }
        }

        public void print() {
            TreeSet<String> treeSet = new TreeSet(new Comparator<String>() { // from class: org.cogroo.formats.ad.ADChunkBasedShallowParserSampleStream.SubjectTypes.1
                @Override // java.util.Comparator
                public int compare(String str, String str2) {
                    if (str.equals(str2)) {
                        return 0;
                    }
                    return ((AtomicInteger) SubjectTypes.this.subjects.get(str)).intValue() - ((AtomicInteger) SubjectTypes.this.subjects.get(str2)).intValue();
                }
            });
            treeSet.addAll(this.subjects.keySet());
            for (String str : treeSet) {
                System.out.println(str + " -> " + this.subjects.get(str) + "->" + this.examples.get(str));
            }
        }

        private String[] extractChunk(String[] strArr) {
            String[] strArr2 = new String[strArr.length];
            for (int i = 0; i < strArr.length; i++) {
                strArr2[i] = extractChunk(strArr[i]);
            }
            return strArr2;
        }

        private String extractChunk(String str) {
            return str.substring(str.indexOf(124) + 1);
        }
    }

    public ADChunkBasedShallowParserSampleStream(ObjectStream<String> objectStream, String str, boolean z, boolean z2, boolean z3) {
        super(objectStream);
        this.defaultFunctTags = new String[]{"SUBJ", "ACC", "DAT", "PIV", "ADVS", "ADVO", "SC", "OC", "P", "NPHR", "SA", "ADVL", "APP"};
        this.sv = new ShallowParserSequenceValidator();
        this.subjectTypes = new SubjectTypes();
        if (str == null || str.trim().isEmpty()) {
            HashSet hashSet = new HashSet();
            hashSet.addAll(Arrays.asList(this.defaultFunctTags));
            this.functTagSet = Collections.unmodifiableSet(hashSet);
        } else {
            String[] split = str.split(",");
            HashSet hashSet2 = new HashSet();
            hashSet2.addAll(Arrays.asList(split));
            this.functTagSet = Collections.unmodifiableSet(hashSet2);
        }
    }

    public ADChunkBasedShallowParserSampleStream(InputStreamFactory inputStreamFactory, String str, String str2, boolean z, boolean z2, boolean z3) throws IOException {
        super(inputStreamFactory, str);
        this.defaultFunctTags = new String[]{"SUBJ", "ACC", "DAT", "PIV", "ADVS", "ADVO", "SC", "OC", "P", "NPHR", "SA", "ADVL", "APP"};
        this.sv = new ShallowParserSequenceValidator();
        this.subjectTypes = new SubjectTypes();
        if (str2 == null || str2.trim().isEmpty()) {
            HashSet hashSet = new HashSet();
            hashSet.addAll(Arrays.asList(this.defaultFunctTags));
            this.functTagSet = Collections.unmodifiableSet(hashSet);
        } else {
            String[] split = str2.split(",");
            HashSet hashSet2 = new HashSet();
            hashSet2.addAll(Arrays.asList(split));
            this.functTagSet = Collections.unmodifiableSet(hashSet2);
        }
    }

    @Override // org.cogroo.formats.ad.ADChunk2SampleStream
    /* renamed from: read */
    public ChunkSample mo5read() throws IOException {
        ArrayList arrayList;
        ArrayList arrayList2;
        ArrayList arrayList3;
        do {
            ADSentenceStream.Sentence sentence = (ADSentenceStream.Sentence) this.adSentenceStream.read();
            if (sentence == null) {
                return null;
            }
            this.readChunk = true;
            ADSentenceStream.SentenceParser.Node root = sentence.getRoot();
            arrayList = new ArrayList();
            arrayList2 = new ArrayList();
            this.chunks = new ArrayList<>();
            processRoot(root, arrayList, arrayList2, this.chunks);
            this.readChunk = false;
            arrayList.clear();
            arrayList2.clear();
            arrayList3 = new ArrayList();
            processRoot(root, arrayList, arrayList2, arrayList3);
            for (int i = 0; i < arrayList2.size(); i++) {
                arrayList2.set(i, arrayList2.get(i) + "|" + this.chunks.get(i));
            }
        } while (arrayList.size() <= 0);
        ChunkSample chunkSample = new ChunkSample(arrayList, arrayList2, arrayList3);
        int i2 = 0;
        while (i2 < arrayList.size()) {
            if (!this.sv.validSequence(i2, WordTag.create(chunkSample), i2 > 0 ? (String[]) arrayList3.subList(0, i2).toArray(new String[i2]) : new String[0], arrayList3.get(i2))) {
                System.out.println("failed, invalid outcome: " + arrayList3.get(i2));
            }
            i2++;
        }
        return chunkSample;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.cogroo.formats.ad.ADChunk2SampleStream
    public String getChunkTag(ADSentenceStream.SentenceParser.Leaf leaf) {
        if (this.readChunk) {
            return super.getChunkTag(leaf);
        }
        String syntacticTag = leaf.getSyntacticTag();
        if (this.functTagSet.contains(syntacticTag)) {
            return syntacticTag;
        }
        return null;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.cogroo.formats.ad.ADChunk2SampleStream
    public String getChunkTag(ADSentenceStream.SentenceParser.Node node, String str, int i) {
        if (this.readChunk) {
            return super.getChunkTag(node, str, i);
        }
        String syntacticTag = node.getSyntacticTag();
        String substring = syntacticTag.substring(0, syntacticTag.lastIndexOf(":"));
        if (!this.functTagSet.contains(substring)) {
            substring = ADChunk2SampleStream.OTHER;
        }
        if (substring.equals(str)) {
            return ADChunk2SampleStream.OTHER;
        }
        if (substring.equals(ADChunk2SampleStream.OTHER)) {
            return substring;
        }
        int countLeafs = countLeafs(node);
        String str2 = this.chunks.get(i);
        boolean z = str2.equals(ADChunk2SampleStream.OTHER) || str2.startsWith("B-");
        if (z) {
            if (this.chunks.size() == i + countLeafs) {
                return substring;
            }
            String str3 = this.chunks.get(i + countLeafs);
            z = str3.equals(ADChunk2SampleStream.OTHER) || str3.startsWith("B-");
        }
        return z ? substring : ADChunk2SampleStream.OTHER;
    }

    private int countLeafs(ADSentenceStream.SentenceParser.Node node) {
        int i = 0;
        for (ADSentenceStream.SentenceParser.TreeElement treeElement : node.getElements()) {
            i = treeElement.isLeaf() ? i + 1 : i + countLeafs((ADSentenceStream.SentenceParser.Node) treeElement);
        }
        return i;
    }

    @Override // org.cogroo.formats.ad.ADChunk2SampleStream
    protected String getPhraseTagFromPosTag(String str) {
        return ADChunk2SampleStream.OTHER;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // org.cogroo.formats.ad.ADChunk2SampleStream
    public boolean isIncludePunctuations() {
        if (this.readChunk) {
            return super.isIncludePunctuations();
        }
        return true;
    }
}
