package fr.ifremer.coselmar.services.indexation;

/*
 * #%L
 * Coselmar :: Rest Services
 * $Id:$
 * $HeadURL:$
 * %%
 * Copyright (C) 2014 - 2015 Ifremer, Code Lutin
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * <http://www.gnu.org/licenses/gpl-3.0.html>.
 * #L%
 */

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

import fr.ifremer.coselmar.beans.DocumentBean;
import fr.ifremer.coselmar.services.CoselmarSimpleServiceSupport;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;

/**
 * This Services provides operation about {@link fr.ifremer.coselmar.persistence.entity.Document}
 * or more exactly {@link fr.ifremer.coselmar.beans.DocumentBean} indexation :
 * <ul>
 * <li>registration of a document in the indexation db</li>
 * <li>modification of a document in the indexation db</li>
 * <li>documents search from the indexation db</li>
 * </ul>
 *
 * The purpose is to use power of a indexation db (lucene) to increase search on
 * document text field, and make easier fulltext search
 *
 * @author ymartel <martel@codelutin.com>
 */
public class DocumentsIndexationService extends CoselmarSimpleServiceSupport {

    protected static final String DOCUMENT_ID_INDEX_PROPERTY = "documentId";
    protected static final String DOCUMENT_NAME_INDEX_PROPERTY = "documentName";
    protected static final String DOCUMENT_AUTHORS_INDEX_PROPERTY = "documentAuthors";
    protected static final String DOCUMENT_SUMMARY_INDEX_PROPERTY = "documentSummary";
    protected static final String DOCUMENT_KEYWORD_INDEX_PROPERTY = "documentKeyword";
    protected static final String DOCUMENT_TYPE = "document";

    public void indexDocument(DocumentBean document) throws IOException {

        Document doc = new Document();
        doc.add(new StringField(DOCUMENT_ID_INDEX_PROPERTY, document.getId(), Field.Store.YES));

        doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES));
        doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES));
        doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES));
        doc.add(new Field("type", DOCUMENT_TYPE, TextField.TYPE_STORED));

        Set<String> keywords = document.getKeywords();
        for (String keyword : keywords) {
            doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED));
        }

        getLuceneUtils().getIndexWriter().addDocument(doc);
        getLuceneUtils().getIndexWriter().commit();

    }

    public List<String> searchDocuments(String text) throws IOException, ParseException {
        DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false);
        IndexSearcher isearcher = new IndexSearcher(ireader);

        String[] words = text.split(" ");

        // Parse a simple query that searches for the "text":
        BooleanQuery query = new BooleanQuery();

        PhraseQuery nameQuery = new PhraseQuery();
        PhraseQuery summaryQuery = new PhraseQuery();
        PhraseQuery authorsQuery = new PhraseQuery();

        for (String word : words) {
            nameQuery.add(new Term(DOCUMENT_NAME_INDEX_PROPERTY, word.toLowerCase()));
            summaryQuery.add(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, word.toLowerCase()));
            authorsQuery.add(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, word.toLowerCase()));
        }

        query.add(nameQuery, BooleanClause.Occur.SHOULD);
        query.add(summaryQuery, BooleanClause.Occur.SHOULD);
        query.add(authorsQuery, BooleanClause.Occur.SHOULD);

        query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text)), BooleanClause.Occur.SHOULD);


        // Combine that with the type
        BooleanQuery fullQuery = new BooleanQuery();
        fullQuery.add(query, BooleanClause.Occur.MUST);
        fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);

        ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs;

        List<String> documentIds = new ArrayList(hits.length);

        for (ScoreDoc hit : hits) {
            Document doc = isearcher.doc(hit.doc);
            String documentId = doc.get(DOCUMENT_ID_INDEX_PROPERTY);
            documentIds.add(documentId);
        }

        ireader.close();
        return documentIds;
    }

    public List<String> searchDocuments(List<String> texts) throws IOException, ParseException {
        DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false);
        IndexSearcher isearcher = new IndexSearcher(ireader);


        BooleanQuery keywordsQuery = new BooleanQuery();

        for (String text : texts) {
            String[] words = text.split(" ");

            // Parse a simple query that searches for the "text":
            BooleanQuery query = new BooleanQuery();

            PhraseQuery nameQuery = new PhraseQuery();
            PhraseQuery summaryQuery = new PhraseQuery();
            PhraseQuery authorsQuery = new PhraseQuery();

            for (String word : words) {
                nameQuery.add(new Term(DOCUMENT_NAME_INDEX_PROPERTY, word.toLowerCase()));
                summaryQuery.add(new Term(DOCUMENT_SUMMARY_INDEX_PROPERTY, word.toLowerCase()));
                authorsQuery.add(new Term(DOCUMENT_AUTHORS_INDEX_PROPERTY, word.toLowerCase()));
            }

            query.add(nameQuery, BooleanClause.Occur.SHOULD);
            query.add(summaryQuery, BooleanClause.Occur.SHOULD);
            query.add(authorsQuery, BooleanClause.Occur.SHOULD);

            query.add(new TermQuery(new Term(DOCUMENT_KEYWORD_INDEX_PROPERTY, text)), BooleanClause.Occur.SHOULD);


            // Combine that with the type
            //XXX ymartel : put to Occur.SHOULD to make an "OR"
            keywordsQuery.add(query, BooleanClause.Occur.MUST);
        }

        BooleanQuery fullQuery = new BooleanQuery();
        fullQuery.add(keywordsQuery, BooleanClause.Occur.MUST);
        fullQuery.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);

        ScoreDoc[] hits = isearcher.search(fullQuery, null, 1000).scoreDocs;

        List<String> documentIds = new ArrayList(hits.length);

        for (ScoreDoc hit : hits) {
            Document doc = isearcher.doc(hit.doc);
            String documentId = doc.get(DOCUMENT_ID_INDEX_PROPERTY);
            documentIds.add(documentId);
        }

        ireader.close();
        return documentIds;
    }

    public void updateDocument(DocumentBean document) throws IOException {
        DirectoryReader ireader = DirectoryReader.open(getLuceneUtils().getIndexWriter(), false);
        IndexSearcher isearcher = new IndexSearcher(ireader);

        // Retrieve document
        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId())), BooleanClause.Occur.MUST);
        query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);

        ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
        if (hits.length > 0) {
            Document doc = isearcher.doc(hits[0].doc);

            doc.removeField(DOCUMENT_NAME_INDEX_PROPERTY);
            doc.add(new TextField(DOCUMENT_NAME_INDEX_PROPERTY, document.getName(), Field.Store.YES));
            doc.removeField(DOCUMENT_AUTHORS_INDEX_PROPERTY);
            doc.add(new TextField(DOCUMENT_AUTHORS_INDEX_PROPERTY, document.getAuthors(), Field.Store.YES));
            doc.removeField(DOCUMENT_SUMMARY_INDEX_PROPERTY);
            doc.add(new TextField(DOCUMENT_SUMMARY_INDEX_PROPERTY, document.getSummary(), Field.Store.YES));

            doc.removeFields(DOCUMENT_KEYWORD_INDEX_PROPERTY);
            Set<String> keywords = document.getKeywords();
            for (String keyword : keywords) {
                doc.add(new Field(DOCUMENT_KEYWORD_INDEX_PROPERTY, keyword, TextField.TYPE_STORED));
            }

            getLuceneUtils().getIndexWriter().updateDocument(new Term(DOCUMENT_ID_INDEX_PROPERTY, document.getId()), doc);
            getLuceneUtils().getIndexWriter().commit();
        }

        ireader.close();
    }

    public void deleteDocument(String documentId) throws IOException {

        // Retrieve document
        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term(DOCUMENT_ID_INDEX_PROPERTY, documentId)), BooleanClause.Occur.MUST);
        query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);

        getLuceneUtils().getIndexWriter().deleteDocuments(query);
        getLuceneUtils().getIndexWriter().commit();

    }

    protected void cleanIndex() throws IOException {
        BooleanQuery query = new BooleanQuery();
        query.add(new TermQuery(new Term("type", DOCUMENT_TYPE)), BooleanClause.Occur.MUST);
        getLuceneUtils().getIndexWriter().deleteDocuments(query);
        getLuceneUtils().getIndexWriter().commit();
    }


}
