package org.planx.msd.character;

import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.RandomAccess;
import org.planx.msd.*;
import org.planx.msd.number.IntExtractor;
import org.planx.msd.number.IntegerDiscriminator;
import org.planx.msd.util.*;
import org.planx.util.Array;

/**
 * A <code>Discriminator</code> capable of discriminating a multiset of
 * <code>CharSequence</code> objects (e.g <code>String</code>s).
 * This implementation performs an initial partitioning on the lengths of
 * the <code>CharSequence</code>s. Unless the size of the <code>CharSequence</code>s
 * differ very much, this implementation will be slower than the standard
 * <code>CharSequenceDiscriminator</code>.
 * <p>
 * <b>Note that this implementation is not synchronized.</b> If multiple
 * threads access an instance of this class concurrently, it must be
 * synchronized externally.
 *
 * @author Thomas Ambus
 */
public class SizeCharSequenceDiscriminator<T extends CharSequence>
                            extends AbstractDiscriminator<T> {
    private static final int INIT_CAPACITY = 10;
    private Memory memory;
    private IntegerDiscriminator intDisc;

    /**
     * Constructs a new <code>CharSequenceDiscriminator</code> reusing the memory
     * allocated in the specified <code>Memory</code>.
     */
    public SizeCharSequenceDiscriminator(Memory memory) {
        this.memory = memory;
        intDisc = new IntegerDiscriminator(memory);
    }

    public <U,S> Collection<List<S>> discriminate(List<? extends U> values,
                                        final Extractor<U,? extends T,S> e) {

        // Check fast way out: only zero, one, or two elements

        int vsize = values.size();
        switch (vsize) {
        case 0:
            return Collections.emptyList();
        case 1:
            List<S> l = Collections.singletonList(e.getValue(values.get(0)));
            return Collections.singletonList(l);
        case 2:
            U u1 = values.get(0);
            U u2 = values.get(1);
            CharSequence c1 = e.getLabel(u1);
            CharSequence c2 = e.getLabel(u2);
            if (Discriminators.equals(c1,c2)) {
                List<S> l1 = new ArrayList<S>(2);
                l1.add(e.getValue(u1));
                l1.add(e.getValue(u2));
                return Collections.singletonList(l1);
            } else {
                Collection<List<S>> r = new ArrayList<List<S>>(2);
                List<S> l1 = Collections.singletonList(e.getValue(u1));
                List<S> l2 = Collections.singletonList(e.getValue(u2));
                r.add(l1);
                r.add(l2);
                return r;
            }
        }
        if (!(values instanceof RandomAccess)) values =
            new ArrayList<U>(values);

        // Declarations

        List[] dictionary = memory.dictionary;
        int[] used = memory.used;
        int used_size = 0;

        Collection<List<S>> result = new ArrayList<List<S>>();
        List<S> finished = new ArrayList<S>();

        int[] indexes = new int[INIT_CAPACITY]; // Note: Initialized to 0
        List[] work = new List[INIT_CAPACITY];
        int work_capacity = INIT_CAPACITY;
        int work_head = 0;

        // Perform initial partitioning based on list size

        IntExtractor<U,U> intExt = new IntExtractor<U,U>() {
            public int getLabel(U elm) {
                return e.getLabel(elm).length();
            }
            public U getValue(U elm) {
                return elm;
            }
        };
        Collection<List<U>> partition =
            intDisc.discriminate(values, intExt);

        // Add all size-equivalent blocks to unfinished work

        for (List<U> block : partition) {
            // If the block only contains one element it cannot
            // be partitioned any further
            if (block.size() > 1) {
                if (work_head+1 >= work_capacity) {
                    work = Array.ensureCapacity(
                        work, work_head, work_capacity+1);
                    indexes = Array.ensureCapacity(
                        indexes, work_head, work_capacity+1);
                    work_capacity = work.length;
                }
                work[work_head] = block;
                work_head++;
            } else {
                result.add(Discriminators.valueList(block, e));
            }
        }

        // Refine until there are no unfinished blocks

        while (work_head > 0) {
            if (work_head < work_capacity) work[work_head] = null;
            work_head--;

            // Subpartition current unfinished block
            // it is guaranteed that all charseqs in the block
            // have the same length

            List<U> block = work[work_head];
            int blockSize = block.size();
            int initSubSize = (blockSize < 10) ? blockSize : 10;
            int index = indexes[work_head];

            if (e.getLabel(block.get(0)).length() > index) {

                // End not reached, partition further

                used_size = 0;
                for (int i=0; i<blockSize; i++) {
                    U elm = block.get(i);
                    CharSequence seq = e.getLabel(elm);
                    int c = (int) seq.charAt(index);

                    List<U> list = dictionary[c];
                    if (list == null) {
                        list = new ArrayList<U>(initSubSize);
                        dictionary[c] = list;
                        used[used_size++] = c;
                    }
                    list.add(elm);
                }

                // Add blocks in partition to result or unfinished

                index++;
                for (int i=0; i<used_size; i++) {
                    int idx = used[i];
                    List<U> subBlock = dictionary[idx];
                    dictionary[idx] = null;

                    // If the block only contains one element it
                    // cannot be partitioned any further

                    if (subBlock.size() > 1) {
                        if (work_head+1 >= work_capacity) {
                            work = Array.ensureCapacity(
                                work, work_head, work_capacity+1);
                            indexes = Array.ensureCapacity(
                                indexes, work_head, work_capacity+1);
                            work_capacity = work.length;
                        }
                        work[work_head] = subBlock;
                        indexes[work_head] = index;
                        work_head++;
                    } else {
                        result.add(Discriminators.valueList(subBlock, e));
                    }
                }
            } else {
                // End of all charseqs in block reached, i.e.
                // they are equivalent
                result.add(Discriminators.valueList(block, e));
            }
        }
        return result;
    }
}
