#include "cl_config.h"
#include "defaults.h"
#include "verbosity.h"

#include <trish_dict.h>
#include <patricia_search.h>

#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>

#include <popt.h>
#include <string.h>


#include "mouchard.h"

wchar_t *char2wcs(const char *s) {
  size_t l = mbstowcs(NULL, s, 0) + 1;
  wchar_t *w;

  if (l == (size_t)-1)
    return NULL;
  w = calloc(l, sizeof(wchar_t));
  mbstowcs(w, s, l - 1);

  return w;
}

char *wcs2char(const wchar_t *w) {
  size_t l = wcstombs(NULL, w, 1023);
  char *s = calloc(l + 1, sizeof(char));
  wcstombs(s, w, 1023);
  return s;
}

static char *canon_fn = NULL;
static char *parent_fn = NULL;
static char *suffix = DEFAULT_SUFFIX;
static int stdo = 0;

  static struct poptOption cl_verbosity_options[] = CLX_VERBOSITY_OPTIONS;
struct poptOption cl_options[] = {
  POPT_AUTOHELP
  { CLOL_CANON  , CLOS_CANON  , POPT_ARG_STRING , &canon_fn  , 0 , CLOH_CANON  , "DICT" },
  { CLOL_PARENT , CLOS_PARENT , POPT_ARG_STRING , &parent_fn , 0 , CLOH_PARENT , "DICT" },
  { CLOL_SUFFIX , CLOS_SUFFIX , POPT_ARG_STRING , &suffix    , 0 , CLOH_SUFFIX , "SUFF" },
  { CLOL_STDOUT , CLOS_STDOUT , POPT_ARG_NONE   , &stdo      , 0 , CLOH_STDOUT , NULL },
  { NULL , '\0' , POPT_ARG_INCLUDE_TABLE , cl_verbosity_options , 0 , "Verbosity options:" , NULL },
  POPT_TABLEEND
};

int main(int argc, const char **argv) {
  int opth;
  poptContext context = poptGetContext(NULL, argc, argv, cl_options, 0);

  xmlDoc *onto_doc = NULL;
  tdict_t *canon_dict = NULL;
  tdict_t *parent_dict = NULL;
  psearch_t *search_canon = NULL;
  psearch_t *search_parent = NULL;
  const char *in_fn;
  xmlXPathContext *xpc_onto;

  poptSetOtherOptionHelp(context, "ONTOFILE FILE [FILES]");

  while ((opth = poptGetNextOpt(context)) > 0) {
    switch (opth) {
    case CLOV_QUIET:
      verbosity = V_QUIET;
      break;
    case CLOV_VERBOSE:
      verbosity = V_VERBOSE;
      break;
    case CLOV_LOCACE:
      verbosity = V_LOCACE;
      break;
    case CLOV_STRICT:
      severity = V_WARNING;
      break;
    }
  }
  if (opth != -1) {
    verbosity_message(V_QUIET | V_ERROR, "%s %s", poptStrerror(opth), poptBadOption(context, 0));
  }

  if (poptPeekArg(context)) {
    const char *onto_fn =  poptGetArg(context);
    onto_doc = xmlReadFile(onto_fn, NULL, 0);
    if (onto_doc == NULL) {
      verbosity_message(V_QUIET | V_ERROR, "Could not open file '%s'", onto_fn);
    }
    xpc_onto = xmlXPathNewContext(onto_doc);
    xmlXPathRegisterNs(xpc_onto, BAD_CAST "rdf", BAD_CAST RDF_NS);
    xmlXPathRegisterNs(xpc_onto, BAD_CAST "xsd", BAD_CAST XSD_NS);
    xmlXPathRegisterNs(xpc_onto, BAD_CAST "onto", BAD_CAST ONTO_NS);
  }
  else {
    verbosity_message(V_QUIET | V_ERROR, "You must provide an ontology file name");
  }

  if (canon_fn != NULL) {
    FILE *f = fopen(canon_fn, "r");
    if (f == NULL) {
      verbosity_message(V_QUIET | V_ERROR, "Could not open file '%s'", canon_fn);
    }
    canon_dict = tdict_load(f);
    fclose(f);
    search_canon = psearch_new(canon_dict->map, PSEARCH_OPTION_FULL, NULL, 1);
  }

  if (parent_fn != NULL) {
    FILE *f = fopen(parent_fn, "r");
    if (f == NULL) {
      verbosity_message(V_QUIET | V_ERROR, "Could not open file '%s'", parent_fn);
    }
    parent_dict = tdict_load(f);
    fclose(f);
    search_parent = psearch_new(parent_dict->multimap->pat, PSEARCH_OPTION_FULL | PSEARCH_OPTION_CI_ALL, NULL, 1);
  }

  while ((in_fn = poptGetArg(context))) {
    int i;
    xmlDoc *doc = xmlReadFile(in_fn, NULL, 0);
    xmlXPathContext *xpc_doc;
    xmlXPathObject *xpo_la;
    xmlNode *la_node = NULL;
    xmlNode *sfl_node = NULL;
    int sfid = 0;
    char *out_fn = NULL;
    char *in_dot = NULL;
    FILE *f;

    verbosity_message(V_VERBOSE | V_INFO, "Processing file '%s'", in_fn);

    if (doc == NULL) {
      verbosity_message(V_QUIET | V_WARNING, "Could not open file '%s'", in_fn);
    }

    xpc_doc = xmlXPathNewContext(doc);
    xmlXPathRegisterNs(xpc_doc, BAD_CAST "a", BAD_CAST ALVIS_NS);

    xpo_la = xmlXPathEvalExpression(BAD_CAST "/a:documentCollection/a:documentRecord/a:linguisticAnalysis", xpc_doc);
    if (xpo_la && xpo_la->nodesetval) {

      xmlXPathObject *xpo_su = xmlXPathEvalExpression(BAD_CAST "/a:documentCollection/a:documentRecord/a:linguisticAnalysis/a:semantic_unit_level/a:semantic_unit/a:*", xpc_doc);
      int su_number = ((xpo_su == NULL) || (xpo_su->nodesetval == NULL)) ? 0 : xpo_su->nodesetval->nodeNr;


      la_node = xpo_la->nodesetval->nodeTab[0];

      verbosity_message(V_VERBOSE | V_INFO, "  Linguistic Analysis found, %d semantic units", su_number);

      for (i = 0; i < su_number; i++) {
	xmlNode *form_node = NULL;
	xmlNode *cform_node = NULL;
	char *form = NULL;
	char *cform = NULL;
	xmlNode *n;
	char *suid = NULL;
	char *class = NULL;

	/* Get SU characteristics: id, form and canonical form */
	for (n = xpo_su->nodesetval->nodeTab[i]->children; n != NULL; n = n->next) {
	  if (!strcmp((char *)n->name, "id")) {
	    suid = (char *)n->children->content;
	  }
	  if (!strcmp((char *)n->name, "form")) {
	    form_node = n;
	    if (n->children != NULL)
	      form = (char *)n->children->content;
	  }
	  if (!strcmp((char *)n->name, "canonical_form")) {
	    cform_node = n;
	    if (n->children != NULL)
	      cform = (char *)n->children->content;
	  }
	}

	/* No form for SU: out */
	if ((form_node == NULL) || (form == NULL))
	  continue;

	/* Look for canonical form if there isn't any and if the canon dictionary is provided */
	if ((cform == NULL) && (search_canon != NULL)) {
	  wchar_t *w = char2wcs(form);
	  psearch_reinit(search_canon);
	  psearch_new_query(search_canon, w);
	  patricia_search(search_canon);
	  if (search_canon->final != NULL) {
	    cform = wcs2char(((wchar_t **)(search_canon->final->node->data))[0]);
	    if (cform_node == NULL) {
	      cform_node = xmlNewDocNode(doc, NULL, BAD_CAST "canonical_form", BAD_CAST cform);
	      xmlAddPrevSibling(form_node, cform_node);
	    }
	    else {
	      xmlAddChild(cform_node, xmlNewDocText(doc, BAD_CAST cform));
	    }
	  }
	  free(w);
	}

	/* No canonical form: same as form */
	if ((cform_node == NULL) || (cform == NULL)) {
	  cform_node = xmlNewDocNode(doc, NULL, BAD_CAST "canonical_form", BAD_CAST form);
	  xmlAddPrevSibling(form_node, cform_node);
	  cform = form;
	}

	MOUCH_STR(form);
	MOUCH_STR(cform);
	/* Look for the class in the parent dictionary */
	if (search_parent != NULL) {
	  wchar_t *w = char2wcs(cform);
	  psearch_reinit(search_parent);
	  psearch_new_query(search_parent, w);
	  patricia_search(search_parent);
	  if (search_parent->final != NULL) {
	    pmap_multival_t *mval = search_parent->final->node->data;
	    class = wcs2char(((wchar_t **)mval->value)[0]); /* XXX */
	  }
	  free(w);
	}

	/* Look for the class in the ontology */
	if (class == NULL) {
	  xmlXPathObject *xpo_onto = NULL;
	  char *q1 = calloc(strlen(Q1) + strlen(cform), sizeof(char));
	  sprintf(q1, Q1, cform);
	  xpo_onto = xmlXPathEvalExpression(BAD_CAST q1, xpc_onto);
	  if (xpo_onto && xpo_onto->nodesetval && xpo_onto->nodesetval->nodeTab) {
	    char *id;
	    char *q2;
	    id = (char *)xmlHasProp(xpo_onto->nodesetval->nodeTab[0], BAD_CAST "ID")->children->content;
	    q2 = calloc(strlen(Q2) + strlen(id), sizeof(char));
	    sprintf(q2, Q2, id);
	    xpo_onto = xmlXPathEvalExpression(BAD_CAST q2, xpc_onto);
	    if (xpo_onto && xpo_onto->nodesetval) {
	      class = (char *)xmlHasProp(xpo_onto->nodesetval->nodeTab[0], BAD_CAST "ID")->children->content;
	    }
	  }
	  free(q1);
	  xmlXPathFreeObject(xpo_onto);
	}

	/* No class: out */
	if (class == NULL)
	  continue;

	/* If class is something worthy */
	if (strcmp(class, "")) {
	  xmlNode *sf_node = xmlNewDocNode(doc, NULL, BAD_CAST "semantic_features", NULL);
	  xmlNode *semantic_cat = xmlNewDocNode(doc, NULL, BAD_CAST "semantic_category", NULL);
	  xmlNode *node_list = xmlNewDocNode(doc, NULL, BAD_CAST "list_refid_ontology_node", NULL);
	  xmlNode *class_node = xmlNewDocNode(doc, NULL, BAD_CAST "refid_ontology_node", BAD_CAST class);
	  xmlNode *su_ref = xmlNewDocNode(doc, NULL, BAD_CAST "refid_semantic_unit", BAD_CAST suid);
	  xmlNode *sfid_node = NULL;
	  char *sfid_full = calloc(strlen(SFID_FORMAT) + 5, sizeof(char));
	  
	  sprintf(sfid_full, SFID_FORMAT, sfid);
	  sfid++;
	  sfid_node = xmlNewDocNode(doc, NULL, BAD_CAST "id", BAD_CAST sfid_full);
	  
	  xmlAddChild(sf_node, sfid_node);
	  xmlAddChild(node_list, class_node);
	  xmlAddChild(semantic_cat, node_list);
	  xmlAddChild(sf_node, semantic_cat);
	  xmlAddChild(sf_node, su_ref);
	  
	  if (sfl_node == NULL) {
	    sfl_node = xmlNewDocNode(doc, NULL, BAD_CAST "semantic_features_level", NULL);
	    xmlAddChild(la_node, sfl_node);
	  }
	  
	  xmlAddChild(sfl_node, sf_node);
	  free(sfid_full);
	}
	free(class);
      }

      xmlXPathFreeObject(xpo_su);
    }

    if (stdo) {
      xmlDocDump(stdout, doc);
    }
    else {
      if ((in_dot = strrchr(in_fn, '.'))) {
	*in_dot = '\0';
      }
      out_fn = calloc(strlen(in_fn) + strlen(suffix) + 1, sizeof(char));
      strcat(out_fn, in_fn);
      strcat(out_fn, suffix);
      f = fopen(out_fn, "w");
      if (f == NULL) {
	verbosity_message(V_QUIET | V_WARNING, "Could not open '%s' for writing", out_fn);
      }
      else {
	xmlDocDump(f, doc);
	fclose(f);
      }
    }
    free(out_fn);

    xmlXPathFreeObject(xpo_la);
    xmlXPathFreeContext(xpc_doc);
    xmlFreeDoc(doc);
  }

  poptFreeContext(context);
  xmlXPathFreeContext(xpc_onto);
  xmlFreeDoc(onto_doc);
  xmlCleanupParser();

  if (search_canon != NULL)
    psearch_free(search_canon);
  if (search_parent != NULL)
    psearch_free(search_parent);
  if (canon_dict != NULL)
    tdict_free(canon_dict);
  if (parent_dict != NULL)
    tdict_free(parent_dict);

  return 0;
}
