/*
CSTLEMMA - trainable lemmatiser using word-end inflectional rules

Copyright (C) 2002, 2004  Center for Sprogteknologi, University of Copenhagen

This file is part of CSTLEMMA.

CSTLEMMA is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

CSTLEMMA is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with CSTLEMMA; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "basefrm.h"
#include "basefrmpntr.h"
#include "functio.h"
#include "functiontree.h"
#include "word.h"
#include "tags.h"
#include <assert.h>
#include <stdlib.h>

int basefrm::index = 0;
functionTree * basefrm::bfuncs = NULL;// used if -W option set
functionTree * basefrm::Bfuncs = NULL;// used if -W option set
functionTree * basefrm::wfuncs = NULL;// used if -W option set
const char * basefrm::sep;
bool basefrm::hasW = false;
FILE * basefrm::fp = NULL;
tagpairs * TagFriends = NULL;

void basefrm::setFile(FILE * fp)
    {
    basefrm::fp = fp;
    functionString::fp = fp;
    index = 0;
    }

function * basefrm::getBasefrmFunction(int character,bool & DummySortInput,int & testType)
    {
    switch(character)
        {
        case 'f':
            return new functionNoArgB(&basefrm::F);
#if FREQ24
        case 'n':
            return new functionNoArgB(&basefrm::N);
#endif
            /*case 'p':
            return new functionNoArgB(&basefrm::P);*/
        case 'w':
            return new functionNoArgB(&basefrm::W);
        case 'W':
            hasW = true;
            testType |= NUMBERTEST;
            return new functionNoArgW(&basefrm::L,&basefrm::countFullForms);
//            return new functionNoArgB(&basefrm::L);
        case 't':
            return new functionNoArgB(&basefrm::T);
        }
    return NULL;
    }

function * basefrm::getBasefrmFunctionNoW(int character,bool & DummySortInput,int & testType)
    {
    switch(character)
        {
        case 'f':
            return new functionNoArgB(&basefrm::F);
#if FREQ24
        case 'n':
            return new functionNoArgB(&basefrm::N);
#endif
            /*case 'p':
            return new functionNoArgB(&basefrm::P);*/
        case 'w':
            return new functionNoArgB(&basefrm::W);
        case 't':
            return new functionNoArgB(&basefrm::T);
        }
    return NULL;
    }

functionTree * basefrm::Format(const char * format)
    {
    functionTree * ret = new functionTree();
    bool DummySortInput;
    int testType = 0;
    OutputClass::Format(format,getBasefrmFunctionNoW,*ret,format,DummySortInput,testType);
/*    printf("\nFormat %s:",format);
    ret->print();
    printf("\n");*/
    return ret;
    }

void basefrm::getAbsorbedBy(basefrm * other)
    {
    owner.reassign(other);
    }

basefrm::~basefrm()
    {
    delete [] s;
    delete [] t;
    delete [] fullForm;
    }

void basefrm::addFullForm(unTaggedWord * word)
    {
    assert(basefrm::hasW);
    if(nfullForm > 0)
        {
        unTaggedWord ** nwlist = new unTaggedWord *[++nfullForm];
        unsigned int i;
        for(i = 0;i < nfullForm - 1;++i)
            nwlist[i] = fullForm[i];
        nwlist[i] = word;
        delete [] fullForm;
        fullForm = nwlist;
        }
    else
        {
        nfullForm = 1;
        fullForm = new unTaggedWord *[1];
        fullForm[0] = word;
        }
    }

void basefrm::L() const
    {
    assert(basefrm::hasW);
    for(unsigned int i = 0;i < nfullForm;++i)
        {
//        fullForm[i]->print();
        wfuncs->printIt(fullForm[i]);
        if(i < nfullForm - 1)
            fprintf(fp,"%s",sep);
//        fullForm[i]->printw(fp,wfuncs,sep);
        }
    }

bool basefrm::setFormat(char * Wformat,const char * bformat,const char * Bformat,bool InputHasTags)
    {
    bool SortInput = false;
    if(bformat)
        {
        bool DummySortInput = false;
        bfuncs = new functionTree();
        int testType = 0;
        OutputClass::Format(bformat,basefrm::getBasefrmFunction,*bfuncs,bformat,DummySortInput,testType);
        }

    if(Bformat)
        {
        bool DummySortInput = false;
        Bfuncs = new functionTree();
        int testType = 0;
        OutputClass::Format(Bformat,basefrm::getBasefrmFunction,*Bfuncs,Bformat,DummySortInput,testType);
        }
    /*
    if(hasW)
        if(InputHasTags)
            wfuncs = taggedWord::Format(Wformat);
        else
            wfuncs = unTaggedWord::Format(Wformat);
*/



    if(hasW)
        {
        if(!Wformat)
            {
            printf("You need to specify -W<full form format> if you use the $W variable in -B<base form format> or -b<base form format>\n");
            exit(0);
            }
        wfuncs = new functionTree();
        int testType = 0;
        if(InputHasTags)
            OutputClass::Format(Wformat,taggedWord::getTaggedWordFunctionNoBb,*wfuncs,Wformat,SortInput,testType);
        else
            OutputClass::Format(Wformat,unTaggedWord::getUnTaggedWordFunctionNoBb,*wfuncs,Wformat,SortInput,testType);
        }
    return SortInput;
    }

void basefrm::printb()const
    {
//    testPrint();
    bfuncs->printIt(this);
    }

void basefrm::printB()const
    {
    Bfuncs->printIt(this);
    }

int basefrm::countFullForms() const
    {
    assert(basefrm::hasW);
    return nfullForm;
    }

void basefrm::addFullForms(basefrm * other)
    {
    assert(basefrm::hasW);
    int nnnfullForm = nfullForm + other->nfullForm;
    if(nnnfullForm)
        {
        unTaggedWord ** nwlist = new unTaggedWord *[nnnfullForm];
        unsigned int i,j,k;
        for(i = 0,j=0,k=0;i < nfullForm && j < other->nfullForm;)
            {
            int cmp = fullForm[i]->cmpword(other->fullForm[j]);
            if(cmp < 0)
                {
                nwlist[k++] = fullForm[i++];
                }
            else 
                nwlist[k++] = other->fullForm[j++];
            }
        for(;i < nfullForm;)
            nwlist[k++] = fullForm[i++];
        for(;j < other->nfullForm;)
            nwlist[k++] = other->fullForm[j++];
        delete [] fullForm;
        fullForm = nwlist;
        nfullForm = nnnfullForm;
        }
//    assert(cnting == nfullForm);
#if FREQ24
    freq24 += other->freq24;
#endif
    }

int basefrm::lemmaFreq() const
    {
    assert(basefrm::hasW);
    int ret = 0;
    for(unsigned int i = 0;i < nfullForm;++i)
        {
        ret += fullForm[i]->itsCnt();
        }
    return ret;
    }

void basefrm::removeFullForm(unTaggedWord * w)
    {
    assert(basefrm::hasW);
    unsigned int i;
    for(i = 0;i < nfullForm;++i)
        {
        if(fullForm[i] == w)
            {
            nfullForm--;
            do
                {
                fullForm[i] = fullForm[i+1];
                }
            while(++i < nfullForm);
            return;
            }
        }
    }

int basefrm::Closeness(const char * tag)
    {
    if(!strcmp(tag,t))
        return 0;

    if(TagFriends)
        return TagFriends->Closeness(tag,t);
    else
        return -1;
    }

void basefrm::testPrint()const
    {
    printf("%s/%s\n",s,t);
    }


/*
void basefrm::remove(baseformpointer * tobermoved)
    {
    for(int i = 0;i < nfullForm;++i)
        fullForm[i]->remove(tobermoved);
        if(fullForm[i] == tobermoved)
            {
            unTaggedWord ** nwlist = new unTaggedWord *[nfullForm - 1];
            for(j = i+1; j < nfullForm;++j)
                nwlist[j-1] = fullForm[j];
            while(--i >= 0)
                nwlist[i] = fullForm[i];
            delete [] fullForm;
            fullForm = nwlist;
            delete tobermoved;
            return;
            }
    }
    */
