#include "cl_config.h"
#include "defaults.h"
#include "message.h"
#include "phrase.h"
#include "strdup.h"
#include "wchar_utils.h"

#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/transform.h>
#include <popt.h>
#include <string.h>
#ifdef TRISH2
#  include <patricia_search.h>
#  include <trish_dict.h>
#else
#  include <dictionary.h>
#  include <patricia_node.h>
#endif

#include "pedantic_macros.h"
#include "mouchard.h"

int verbosity = DEFAULT_VERBOSITY;
int severity = DEFAULT_SEVERITY;

static char *separator = DEFAULT_SEPARATOR;
static int alvis = 0;
static int lemma = 0;
static int max_length = DEFAULT_MAXLENGTH;
static int density = DEFAULT_DENSITY;

struct poptOption cl_options[] = {
  POPT_AUTOHELP
  { NULL , '\0' , POPT_ARG_INCLUDE_TABLE     , verb_opts , 0 , "Verbosity options:" , NULL },
  { CLO_LONG_SEPARATOR , CLO_SHORT_SEPARATOR , POPT_ARG_STRING , &separator  , 0               , CLO_HELP_SEPARATOR , "SEP" },
  { CLO_LONG_ALVIS     , CLO_SHORT_ALVIS     , POPT_ARG_NONE   , &alvis      , 0               , CLO_HELP_ALVIS     , NULL },
  { CLO_LONG_MAXLENGTH , CLO_SHORT_MAXLENGTH , POPT_ARG_INT    , &max_length , 0               , CLO_HELP_MAXLENGTH , "LENGTH" },
  { CLO_LONG_DENSITY   , CLO_SHORT_DENSITY   , POPT_ARG_INT    , &density    , 0               , CLO_HELP_DENSITY   , "DENSITY" },
  { CLO_LONG_LEMMA     , CLO_SHORT_LEMMA     , POPT_ARG_NONE   , &lemma      , 0               , CLO_HELP_LEMMA     , NULL },
  POPT_TABLEEND
};

int main(int argc, const char **argv) {
  int opth;
  int i, j;
  FILE *f;
  const char *in_fn, *dict_fn;
  tdict_t *dict;
  phrase_t *current_ph = NULL;
  phrase_t *last_ph = NULL;
  phrase_t **terms = NULL;
  int n_terms = 0;
  /*int tok_len;*/
  int last_i = 0;
  int last_j = 0;
  xmlDoc *in_doc;
  xmlXPathContext *xp_ctx;
  xmlXPathObject *xp_obj;
  poptContext context = poptGetContext(NULL, argc, argv, cl_options, 0);

  poptSetOtherOptionHelp(context, "DICT FILE [FILES]");

  while ((opth = poptGetNextOpt(context)) > 0) {
    switch (opth) {
    SWITCH_VERBOSITY
    }
  }
  if (opth != -1) {
    message(VERBOSITY_QUIET | SEVERITY_FATAL, "%s %s", poptStrerror(opth), poptBadOption(context, 0));
  }

  if (poptPeekArg(context)) {
    dict_fn =  poptGetArg(context);
    if ((f = fopen(dict_fn, "r"))) {
      dict = tdict_load(f);
      fclose(f);
    }
    else {
      message(VERBOSITY_QUIET | SEVERITY_FATAL, "Could not load dictionary '%s'", dict_fn);
    }
  }
  else {
    message(VERBOSITY_QUIET | SEVERITY_FATAL, "You must provide a dictionary file name");
  }

  if (poptPeekArg(context)) {
    while ((in_fn = poptGetArg(context))) {
      xmlDoc *pre_doc = xmlReadFile(in_fn, NULL, 0);

      if (pre_doc == NULL) {
	message(VERBOSITY_QUIET | SEVERITY_ERROR, "Could not open file '%s'", in_fn);
      }
      
      if (alvis)  {
	int old_sub = xmlSubstituteEntitiesDefault(1);
	int old_led = xmlLoadExtDtdDefaultValue;
	xsltStylesheet *ss = xsltParseStylesheetFile(ALVIS_TO_BIOTERMTAGGER_XSLT);
	
	if (ss == NULL) {
	  message(VERBOSITY_QUIET | SEVERITY_FATAL, "Could not open file '" ALVIS_TO_BIOTERMTAGGER_XSLT "'");
	}

	xmlLoadExtDtdDefaultValue = 1;
	in_doc = xsltApplyStylesheet(ss, pre_doc, NULL);
	xsltFreeStylesheet(ss);
	xmlFreeDoc(pre_doc);
	xsltCleanupGlobals();
	xmlSubstituteEntitiesDefault(old_sub);
	xmlLoadExtDtdDefaultValue = old_led;
      }
      else {
	in_doc = pre_doc;
      }

      /**/
      if (in_doc != NULL) {
	psearch_t *search = psearch_new(dict->map, PSEARCH_OPTION_FULL | PSEARCH_OPTION_CI_ALL, NULL, density);
	int nt;

	xp_ctx = xmlXPathNewContext(in_doc);
	xp_obj = xmlXPathEvalExpression("/BioTermTagger/token-list/t", xp_ctx);
	last_ph = NULL;
	current_ph = NULL;
	last_i = 0;
	last_j = 0;
	nt = (xp_obj->nodesetval == NULL) ? 0 : xp_obj->nodesetval->nodeNr;

	for (i = 0; i < xp_obj->nodesetval->nodeNr; i++) {
	  for (j = 1; (j <= max_length) && (i + j - 1 < xp_obj->nodesetval->nodeNr); j++) {
	    int k = i + j - 1;
	    char *w = (char *)xp_obj->nodesetval->nodeTab[k]->children->content;
	    char *id = (char *)xmlHasProp(xp_obj->nodesetval->nodeTab[k], "id")->children->content;
	    char *lemma = (char *)xmlHasProp(xp_obj->nodesetval->nodeTab[k], "lemma")->children->content;

	    if (j == 1) {
	      phrase_free(current_ph);
	      current_ph = phrase_new(w, lemma, id);
	    }
	    else {
	      phrase_extension(current_ph, w, lemma, separator, id);
	    }
	    
	    if (last_ph && (i >= last_i + last_j)) {
	      message(VERBOSITY_LOCACE | SEVERITY_INFO, "Getting out of the last term range, registering '%s'", last_ph->form);
	      n_terms++;
	      terms = (n_terms == 1) ? calloc(1, sizeof(phrase_t *)) : realloc(terms, sizeof(phrase_t *) * n_terms);
	      terms[n_terms - 1] = last_ph;
	      last_ph = NULL;
	    }
	    
	    if ((i >= last_i + last_j) || (j > last_j)) {
	      wchar_t *w = char2wcs(lemma ? current_ph->lemma : current_ph->form);
	      psearch_reinit(search);
	      psearch_new_query(search, w);
	      patricia_search(search);
	      message(VERBOSITY_VERBOSE | SEVERITY_INFO, "Searching for '%s': %s", lemma ? current_ph->lemma : current_ph->form, search->final ? "yes" : "no");
	      if (search->final) {
		phrase_free(last_ph);
		last_ph = phrase_copy(current_ph);
		last_ph->label = wcs2char(((wchar_t **)search->final->node->data)[0]);
		last_i = i;
		last_j = j;
	      }
	    }
	  }
	}

	psearch_free(search);
	xmlXPathFreeObject(xp_obj);
	xmlXPathFreeContext(xp_ctx);
	xmlFreeDoc(in_doc);
	xmlCleanupParser();
      }
      /**/
    }
  }
  else {
    message(VERBOSITY_QUIET | SEVERITY_FATAL, "You must provide a file name");
  }

  poptFreeContext(context);

  message(VERBOSITY_NORMAL | SEVERITY_INFO, "Form\tLemma\tCanonical\tId List");
  for (i = 0; i < n_terms; i++) {
    printf("%s\t%s\t%s", terms[i]->form, terms[i]->lemma, terms[i]->label);
    for (j = 1; j < terms[i]->n_tokens; j++) {
      printf("\t%s", terms[i]->tokens[j]);
    }
    printf("\n");
  }

  tdict_free(dict);

  return 0;
}
