package org.cogroo.analyzer;

import java.util.ArrayList;
import java.util.regex.Pattern;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.util.Span;
import org.cogroo.text.Document;
import org.cogroo.text.Sentence;
import org.cogroo.text.impl.TokenImpl;

/* loaded from: input_file:org/cogroo/analyzer/Tokenizer.class */
public class Tokenizer implements Analyzer {
    private TokenizerME tokenizer;
    private static final Pattern OPEN_QUOTATION = Pattern.compile("[«“]");
    private static final Pattern CLOSE_QUOTATION = Pattern.compile("[»”]");

    public Tokenizer(TokenizerME tokenizerME) {
        this.tokenizer = tokenizerME;
    }

    @Override // org.cogroo.analyzer.Analyzer
    public void analyze(Document document) {
        Span[] spanArr;
        for (Sentence sentence : document.getSentences()) {
            String text = sentence.getText();
            synchronized (this.tokenizer) {
                spanArr = this.tokenizer.tokenizePos(preprocess(text));
            }
            ArrayList arrayList = new ArrayList(spanArr.length);
            for (int i = 0; i < spanArr.length; i++) {
                arrayList.add(new TokenImpl(spanArr[i].getStart(), spanArr[i].getEnd(), spanArr[i].getCoveredText(text).toString()));
            }
            sentence.setTokens(arrayList);
        }
    }

    private String preprocess(String str) {
        return CLOSE_QUOTATION.matcher(OPEN_QUOTATION.matcher(str).replaceAll("\"")).replaceAll("\"");
    }
}
