/*
 * Decompiled with CFR 0.152.
 */
package smile.feature.extraction;

import java.util.TreeMap;
import java.util.function.Function;
import smile.hash.MurmurHash3;
import smile.util.SparseArray;

public class HashEncoder
implements Function<String, SparseArray> {
    private final Function<String, String[]> tokenizer;
    private final int numFeatures;
    private final boolean alternateSign;

    public HashEncoder(Function<String, String[]> tokenizer, int numFeatures) {
        this(tokenizer, numFeatures, true);
    }

    public HashEncoder(Function<String, String[]> tokenizer, int numFeatures, boolean alternateSign) {
        this.tokenizer = tokenizer;
        this.numFeatures = numFeatures;
        this.alternateSign = alternateSign;
    }

    @Override
    public SparseArray apply(String text) {
        TreeMap<Integer, Integer> bag = new TreeMap<Integer, Integer>();
        for (String word : this.tokenizer.apply(text)) {
            int h = MurmurHash3.hash32((String)word, (int)0);
            int index = h == Integer.MIN_VALUE ? (Integer.MAX_VALUE - (this.numFeatures - 1)) % this.numFeatures : Math.abs(h) % this.numFeatures;
            int value = this.alternateSign && h < 0 ? -1 : 1;
            bag.merge(index, value, Integer::sum);
        }
        SparseArray features = new SparseArray();
        bag.forEach((arg_0, arg_1) -> ((SparseArray)features).append(arg_0, arg_1));
        return features;
    }
}

