/*
CSTLEMMA - trainable lemmatiser using word-end inflectional rules

Copyright (C) 2002, 2004  Center for Sprogteknologi, University of Copenhagen

This file is part of CSTLEMMA.

CSTLEMMA is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

CSTLEMMA is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with CSTLEMMA; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#ifndef WORD_H
#define WORD_H

#include "defines.h"
#include "outputclass.h"
#include "basefrmpntr.h"
#include <string.h>
#include <stdio.h>

#if !QSORT

class taggedWord;
class unTaggedWord;
typedef void (unTaggedWord::*trav0)();
typedef void (taggedWord::*trav0T)();
typedef void (unTaggedWord::*trav0C)()const;
typedef void (unTaggedWord::*trav)(void *);

#endif


class basefrm;
struct lext;

class unTaggedWord : public OutputClass
    {
#if QSORT
    private:
        const unTaggedWord ** unsorted;
#else
    protected:
        unTaggedWord * left;
        unTaggedWord * right;
    public:
        static int reducedtotal;
        void traverse0(trav0);
        void traverse0C(trav0C);
        void traverse(trav,void *);
        /*
        virtual taggedWord * insert(const char * wrd,const char * tag){return NULL;};
        virtual void DissambiguateByTagFriends(){}
        */
#endif
    protected:
        bool hasAddedItselfToBaseForm:1;
        bool FoundInDict:1;
        static FILE * fp;
        char * word;
        baseformpointer * pbfD;  // dictionary's base forms
        baseformpointer * pbfL; // constructed base forms
        // If there is a constructed base form (lemma), then do not do 
        // statistics on the dictionary's lemmas.
        int cnt;
        void i() const
            {
            if(pbfL)
                {
                if(pbfD)
                    fputc('+',fp);
                else
                    fputc('-',fp);
                }
            else
                fputc(' ',fp);
            }
        void f() const
            {
            fprintf(fp,"%d",cnt);
            }
        void w() const
            {
            fprintf(fp,"%s",word);
            }
        void b() const
            {
            if(pbfD)
                pbfD->printfbf(fp,bfuncs,sep);
            }
        void B() const
            {
            if(pbfL)
                pbfL->printfbf(fp,Bfuncs,sep);
            }
        static bool hasb;
        static bool hasB;
        unsigned int maxFrequency(lext * Plext,int nmbr,const char * type,int & n);// The dictionary's available
                               // lexical information for this word.
        char * commonStem(lext * Plext,int nmbr,const char * type,unsigned int freq,unsigned int & offset);
         // Find the common type of the most frequent readings
        char * commonType(lext * Plext,int nmbr,unsigned int freq);
    public:
        static functionTree * Format(char * format);
        static functionTree * funcs;
        static bool setFormat(const char * cformat,const char * bformat,const char * Bformat,bool InputHasTags);
        virtual bool skip()const
            {
            return false; 
            }
        static functionTree * bfuncs;
        static functionTree * Bfuncs;
        static function * getUnTaggedWordFunction(int character,bool & SortInput,int & testType);
        static function * getUnTaggedWordFunctionNoBb(int character,bool & SortInput,int & testType);
        static void setFile(FILE * fp);
        static const char * sep;
        int itsCnt()const{return cnt;}
#if QSORT
        void setUnsorted(const unTaggedWord * b){*unsorted = b;}
        void addCnt(unTaggedWord * b){cnt += b->cnt;}
#endif
        const char * itsWord()const{return word;}
        int cmpword(unTaggedWord * b)const{return strcmp(word,b->word);}
#if !QSORT
        unTaggedWord * insert(const char * wrd);
#endif
        void assignTo(basefrm **& D,basefrm **& L)
            {
            if(pbfL && !FoundInDict)
                pbfL->assignTo(L);
            else if(pbfD) // we do not do gather statistics from both.
                pbfD->assignTo(D);
            }
        int countBaseForms() const
            {
            if(pbfD)
                return pbfD->count();
            else
                return 0;
            }
        int countBaseFormsL()const
            {
            if(pbfL)
                return pbfL->count();
            else
                return 0;
            }
        virtual void print()const;
        virtual void printnew()const
            {
            if(pbfL && !pbfD)
                {
                print();
                }
            }            
        virtual void printConflict()const
            {
            if(pbfL && pbfD)
                {
                print();
                }
            }
        unTaggedWord(const char * word
#if QSORT
            ,const unTaggedWord ** unsorted
#endif
            )
            :
#if QSORT
                unsorted(unsorted),
#else
                left(NULL),right(NULL),
#endif
                hasAddedItselfToBaseForm(false),FoundInDict(false),pbfD(NULL),pbfL(NULL),cnt(1)
            {
            this->word = new char[strlen(word) + 1];
            strcpy(this->word,word);
#if !QSORT
            ++reducedtotal;
#endif
            }
        virtual ~unTaggedWord()
            {
            delete word;
            delete pbfD;
            delete pbfL;
#if !QSORT
            delete left;
            delete right;
#endif
            }
#if PFRQ || FREQ24
        int addBaseFormD(const char * s,const char * t,unsigned int frequency)
            {
            //this->cnt++;
            if(pbfD)
                return pbfD->addBaseForm(s,t,strlen(s),frequency);
            else
                pbfD = new baseformpointer(s,t,strlen(s),frequency);
            return 1;
            }
#else
        int addBaseFormD(const char * s,const char * t)
            {
            //this->cnt++;
            if(pbfD)
                return pbfD->addBaseForm(s,t,strlen(s));
            else
                pbfD = new baseformpointer(s,t,strlen(s));
            return 1;
            }
#endif
        int addBaseFormL(const char * s,const char * t);
        virtual int addBaseFormsL();
		virtual int addBaseFormsDL(lext * Plext,int nmbr,// The dictionary's available
                               // lexical information for this word.
           bool & conflict,int & cntD,int & cntL);//
        void addFullForm()
            {
            if(!hasAddedItselfToBaseForm)
                {
                hasAddedItselfToBaseForm = true;
                if(pbfD)
                    pbfD->addFullForm(this);
                if(pbfL)
                    pbfL->addFullForm(this);
                }
            }
        void DissambiguateByLemmaFrequency()
            {
            if(pbfD)
                {
                pbfD->DissambiguateByLemmaFrequency();
                }
            if(pbfL)
                {
                pbfL->DissambiguateByLemmaFrequency();
                }
            }
        void decFreq()
            {
            if(pbfD)
                {
                pbfD->decFreq(this);
                }
            if(pbfL)
                {
                pbfL->decFreq(this);
                }
            }
#if !QSORT
        void lookup(void * arg);
        void assign(void * arg);
#endif
    };

class taggedWord : public unTaggedWord
    {
    private:
        char * tag;
        void t() const
            {
            fprintf(fp,"%s",tag);
            }
    protected:
    public:
        static functionTree * Format(char * format);
        static function * getTaggedWordFunction(int character,bool & SortInput,int & testType);
        static function * getTaggedWordFunctionNoBb(int character,bool & SortInput,int & testType);
#if QSORT
        virtual int cmptag(taggedWord * b)const{return strcmp(tag,b->tag);}
#else
        void traverse0T(trav0T);
        taggedWord * insert(const char * w,const char * tag);
#endif
        virtual bool skip()const
            {
            return false; 
            }
#if QSORT
        taggedWord(char * word,char * tag,const unTaggedWord ** unsorted):unTaggedWord(word,unsorted)
#else
        taggedWord(const char * word,const char * tag):unTaggedWord(word)
#endif
            {
            this->tag = new char[strlen(tag) + 1];
            strcpy(this->tag,tag);
            }
        virtual ~taggedWord()
            {
            delete tag;
            }
        virtual int addBaseFormsL();
		virtual int addBaseFormsDL(lext * Plext,int nmbr,// The dictionary's available
                               // lexical information for this word.
           bool & conflict,int & cntD,int & cntL);//
        void DissambiguateByTagFriends()
            {
            if(pbfD)
                {
                pbfD->DissambiguateByTagFriends(tag);
                }
            if(pbfL)
                {
                pbfL->DissambiguateByTagFriends(tag);
                }
            }
    };

#endif
