/* CSTLEMMA - trainable lemmatiser Copyright (C) 2002, 2005, 2009 Center for Sprogteknologi, University of Copenhagen This file is part of CSTLEMMA. CSTLEMMA is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. CSTLEMMA is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with CSTLEMMA; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "comp.h" #include "graph.h" /* ACL 2009 paper: Icelandic 71.3 1.5 even_better (71,30 1,51 iflg. D:\dokumenter\tvärsök\even_better\icelandic.xls) peen 71,51 1,65 sugar 70,93 1,86 affiksFEW3 71,02 2,16 no pruning Danish 92.8 0.2 peen sugar: 92,72 0,19 no pruning Norwegian 87.6 0.3 affiksFEW2 sugar: 86,67 0,68 Greek 90.4 0.4 sugar no pruning Slovene 86.7 0.3 affiksFEW3 affiksFEW2: 86,23 0,58 sugar: 86,27 0,41 peen:86,13 0,55 0,4 Swedish 92.3 0.3 sugar pruning 1 German 91.46 0.17 sugar no pruning English 89.0 1.3 sugar pruning 2 Dutch 90.4 0.5 affiksFEW2 sugar: 90,17 0,31 0,3 no pruning Polish 93.88 0.08 peen sugar: 93,88 0,08 (?) no pruning */ #if _NA // IMPORTANT (20090511) R__NA and W__NA are not updated as sibling rules are // added and eat up the training pairs that earlier siblings did not handle. // This error was detected after having used the weight functions for // the ACL-paper. int comp_fairly_good(const vertex * a,const vertex * b) { //const vertex * a = *(const vertex **)A; //const vertex * b = *(const vertex **)B; //fairly good, Icelandic 71.270883 //AMBI: // French ok 85.767516 ambi1 1.156051 ambi2 0.955414 diff 12.121019 rules 7337.500000 2.731849% cutoff 2 int A1 = a->W__R + a->R__R; int B1 = b->W__R + b->R__R; int A2 = a->W__R + a->W__W + a->R__NA; int B2 = b->W__R + b->W__W + b->R__NA; int A3 = a->W__R + a->R__R + a->R__NA; int B3 = b->W__R + b->R__R + b->R__NA; /* int A2 = a->R__NA - a->W__NA; int B2 = b->R__NA - b->W__NA; int A3 = a->W__R - a->R__W; int B3 = b->W__R - b->R__W; */ return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R;// wr + rr int B1 = b->W__R + b->R__R; int A2 = a->W__R + a->R__R + a->R__NA;// wr + rr + rn - r = wr - rw int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__W + a->R__NA;// wr + ww + rn - w = -wn + rn int B3 = b->W__R + b->W__W + b->R__NA; // int A2 = a->W__R - a->R__W; // int B2 = b->W__R - b->R__W; // int A3 = a->R__NA - a->W__NA; // int B3 = b->R__NA - b->W__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R + a->R__NA; // Good: previously wrong words got it right. Bad: previously right words got it wrong. int B1 = b->W__R + b->R__R + b->R__NA; int A2 = a->W__R + a->R__R; // Good: any rightly lemmatized word int B2 = b->W__R + b->R__R; int A3 = a->W__R + a->W__W + a->R__NA; // Good: previously right words that didn't match. They may return to the parent. int B3 = b->W__R + b->W__W + b->R__NA; // Bad: previously wrong words that didn't match. They must be handled by siblings. #else int A1 = a->W__R - a->R__W; // Good: previously wrong words got it right. Bad: previously right words got it wrong. int B1 = b->W__R - b->R__W; int A2 = a->W__R + a->R__R; // Good: any rightly lemmatized word int B2 = b->W__R + b->R__R; int A3 = a->W__R + a->W__W - a->R__R - a->R__W; // Good: previously right words that didn't match. They may return to the parent. int B3 = b->W__R + b->W__W - b->R__R - b->R__W; // Bad: previously wrong words that didn't match. They must be handled by siblings. #endif /* int A1 = a->W__R - a->R__W; // Good: previously wrong words got it right. Bad: previously right words got it wrong. int B1 = b->W__R - b->R__W; int A2 = a->W__R + a->R__R; // Good: any rightly lemmatized word int B2 = b->W__R + b->R__R; int A3 = a->R__NA - a->W__NA; // Good: previously right words that didn't match. They may return to the parent. int B3 = b->R__NA - b->W__NA; // Bad: previously wrong words that didn't match. They must be handled by siblings. */ return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__W + a->W__R + a->W__NA + a->R__W + a->R__R + a->R__NA; int A1; int B1; int A2; int B2; int A3; int B3; // good for small numbers: if(N < 3) { A1 = a->W__R + a->R__R; B1 = b->W__R + b->R__R; A2 = a->W__R + a->R__R + a->R__NA; B2 = b->W__R + b->R__R + b->R__NA; A3 = a->W__R + a->W__W + a->R__NA; B3 = b->W__R + b->W__W + b->R__NA; /* A1 = a->W__R + a->R__R; B1 = b->W__R + b->R__R; A2 = a->W__R - a->R__W; B2 = b->W__R - b->R__W; A3 = a->R__NA - a->W__NA; B3 = b->R__NA - b->W__NA; */ } // good for big numbers: else { A1 = a->W__R + a->R__R + a->R__NA; // Good: previously wrong words got it right. Bad: previously right words got it wrong. B1 = b->W__R + b->R__R + b->R__NA; A2 = a->W__R + a->R__R; // Good: any rightly lemmatized word B2 = b->W__R + b->R__R; A3 = a->R__NA + a->W__R + a->W__W; // Good: previously right words that didn't match. They may return to the parent. B3 = b->R__NA + b->W__R + a->W__W; // Bad: previously wrong words that didn't match. They must be handled by siblings. /* A1 = a->W__R - a->R__W; // Good: previously wrong words got it right. Bad: previously right words got it wrong. B1 = b->W__R - b->R__W; A2 = a->W__R + a->R__R; // Good: any rightly lemmatized word B2 = b->W__R + b->R__R; A3 = a->R__NA - a->W__NA; // Good: previously right words that didn't match. They may return to the parent. B3 = b->R__NA - b->W__NA; // Bad: previously wrong words that didn't match. They must be handled by siblings. */ } return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + 2*a->R__R + a->R__NA; // good: all words that are lemmatised correctly. bad: all previously right words that got it wrong // wr + 2rr + rn - r = ww + rr - rw int B1 = b->W__R + 2*b->R__R + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; // wr + rr + rn - r = wr - rw int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__W + a->R__NA; // wr + ww + rn - w = -wn + rn int B3 = b->W__R + b->W__W + b->R__NA; #else int A1 = a->W__R + a->R__R - a->R__W; // good: all words that are lemmatised correctly. bad: all previously right words that got it wrong // wr + 2rr + rn - r = ww + rr - rw int B1 = b->W__R + b->R__R - b->R__W; int A2 = a->W__R - a->R__W; // wr + rr + rn - r = wr - rw int B2 = b->W__R - b->R__W; int A3 = a->W__R + a->W__W - a->R__R - a->R__W; // wr + ww + rn - w = -wn + rn int B3 = b->W__R + b->W__W - b->R__R - b->R__W; #endif /* int A1 = a->W__R + a->R__R - a->R__W; // good: all words that are lemmatised correctly. bad: all previously right words that got it wrong int B1 = b->W__R + b->R__R - b->R__W; int A2 = a->W__R - a->R__W; int B2 = b->W__R - b->R__W; int A3 = a->R__NA - a->W__NA; int B3 = b->R__NA - b->W__NA; */ return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R - a->R__W; // good: all words that are lemmatised correctly. bad: all previously right words that got it wrong int B1 = b->W__R + b->R__R - b->R__W; int A2 = a->W__R - a->R__W; int B2 = b->W__R - b->R__W; int A3 = a->R__NA - a->W__NA; int B3 = b->R__NA - b->W__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3 25% wrong results) int A1 = a->W__R + a->W__NA + a->R__NA; int B1 = b->W__R + b->W__NA + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->R__NA; int B3 = b->W__R + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R; int B1 = b->W__R + b->R__R; int A2 = a->W__R + a->R__R + a->R__NA; int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__NA + a->R__NA; int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R + a->R__NA; int B1 = b->W__R + b->R__R + b->R__NA; int A2 = a->W__R + a->R__R; int B2 = b->W__R + b->R__R; int A3 = a->W__R + a->W__NA + a->R__NA; int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R; int B1 = b->W__R + b->R__R; int A2 = a->W__R + a->W__NA + a->R__NA; int B2 = b->W__R + b->W__NA + b->R__NA; int A3 = a->W__R + a->R__R + a->R__NA; int B3 = b->W__R + b->R__R + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R; int B1 = b->W__R + b->R__R; int A2 = a->W__R + a->W__NA + a->R__R + a->R__NA; int B2 = b->W__R + b->W__NA + b->R__R + b->R__NA; int A3 = a->W__R + a->R__NA; int B3 = b->W__R + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R) + a->R__NA; int B1 = 4*(b->W__R + b->R__R) + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__NA + a->R__NA; int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R) + a->R__NA; // 3wr + 3rr + rn - r = 3wr + 2rr - rw int B1 = 3*(b->W__R + b->R__R) + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; //wr + rr + rn - r = wr - rw int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__NA + a->R__NA; // wr + wn + rn - w = -ww + rn int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + 2*a->R__R + a->R__NA; // good: all words that are lemmatised correctly. bad: all previously right words that got it wrong // wr - rw + rr int B1 = b->W__R + 2*b->R__R + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; // wr - rw int B2 = b->W__R + b->R__R + b->R__NA; // next lines from peen int A3 = a->W__R + a->W__NA + a->R__NA; // -ww + rn int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R - a->R__W + a->R__R; int AA2 = - a->R__R; int AA3 = - a->W__R - 2*a->W__W;//a->R__NA - a->W__W; int BB1 = b->W__R - b->R__W + b->R__R; int BB2 = - b->R__R; int BB3 = - b->W__R - 2*b->W__W;//b->R__NA - b->W__W; #endif } #endif int comp_honey(const vertex * a,const vertex * b) { //const vertex * a = *(const vertex **)A; //const vertex * b = *(const vertex **)B; // (OK) Dutch 90.179393 +/- 0.589662 at 0.9856 of dataset, 7 iterations, 73324.571429 = 24.642193% rules, cutoff = 0 // (OK) Norwegian 87.272244 +/- 0.267729 at 0.9856 of dataset, 6 iterations, 141038.666667 = 29.880630% rules, cutoff = 0 // (OK) English 88.315152 +/- 1.097312 at 0.9856 of dataset, 3 iterations, 5285.466667 = 7.025276% rules, cutoff=1 // (OK) Icelandic 70.742665 +/- 1.686147 at 0.9856 of dataset, 17 iterations, 29857.000000 = 52.070108% rules, cutoff = 0 // (?) Slovene 86.273367 +/- 0.410931 at 0.9856 of dataset, 9 iterations, 17254.777778 = 8.819297% rules (cutoff = 1) // (?) BEST Greek 90.422464 +/- 0.437009 at 0.9856 of dataset, 5 iterations, 132765.6 = 24.535334% rules, cutoff = 0 // (?) BEST German 91.461918 +/- 0.167574 at 0.9856 of dataset, 7 iterations, 50986 = 16.405554% rules, cutoff = 0 // (?) BEST Swedish 92.265969 +/- 0.277289 at 0.9856 of dataset, 6 iterations, 25935.333333 = 5.506008% rules, cutoff = 1 // (?) Danish 92.585623 +/- 0.171327 at 0.9856 of dataset, 5 iterations, 30422.400000 = 5.576679% rules, cutoff = 1 // (?) BEST Russian 80.815622 +/- 0.450500 at 0.9856 of dataset, 6 iterations, 47079.166667 = 12.084440% rules, cutoff = 1 //AMBI: // French ok 84.477707 ambi1 2.251592 ambi2 1.426752 diff 11.843949 rules 7413.875000 2.760295% cutoff 2 int A1 = a->W__R + 2*a->R__R; int B1 = b->W__R + 2*b->R__R; int A2 = a->W__R + a->R__R; int B2 = b->W__R + b->R__R; int A3 = a->W__R ; int B3 = b->W__R ; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R + a->R__R) + a->R__NA; int B1 = 2*(b->W__R + b->R__R) + b->R__NA; int A2 = a->W__R + a->R__R + a->R__NA; int B2 = b->W__R + b->R__R + b->R__NA; int A3 = a->W__R + a->W__NA + a->R__NA; int B3 = b->W__R + b->W__NA + b->R__NA; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3W__R - 5*a->R__W + a->W__W; int B1 = 6*b->W__R - 5*b->R__W + b->W__W; int A2 = a->W__R - 6*a->R__W; int B2 = b->W__R - 6*b->R__W; int A3 = a->R__R - a->W__W; int B3 = b->R__R - b->W__W; return (A1>B1)?-1:(A1B2)?-1:(A2B3)?-1:(A3= exp(N,0.75) 0, 0, 0, 0, // n >= exp(N,0.5) 0, 0, 0, 0, // n >= exp(N,0.25) 0, 0, 0, 0 // n < exp(N,0.25) }; //int D[NPARMS] = {0}; int R[] = // 64 elements { 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1,-1, 0, 0, 1, 1, 0, 0, 1, 0,-1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0,-1, 0, 1,-1, 0, 0, 1, 1, 0, 0, 1, 0,-1, 0, 1, 0, 1, 0, 0, 1,-1, 0, 0, 1, 1 }; void plus(int * dest, int * term,int cols) { for(int col = 0;col < cols;++col) dest[col] += term[col]; } void copy(int * dest,int * source,int cols) { for(int col = 0;col < cols;++col) dest[col] = source[col]; } void betterfound(int Nnodes,const char * extra,int swath,int iterations) { copy(best,parms,NPARMS); FILE * f = fopen(besttxt,"a"); if(f) { fprintf(f,"//iteration:%d.%d\n",swath,iterations); fprintf(f,"/* %d */\n",Nnodes); for(int i = 0;i < NPARMS;++i) { fprintf(f,"%d",parms[i]); if(((i+1) % 4) == 0) if(i == NPARMS - 1) fprintf(f,"\n"); else fprintf(f,",\n"); else fprintf(f,",\t"); } fclose(f); } } int minparmsoff = 0; void copybest() { copy(parms,best,NPARMS); // go on with best result so far. } const char * besttxt; const char * parmstxt = "parms.txt"; int OnlyZeros = NPARMS; bool brown() { static int it = 0; FILE * f = fopen(parmstxt,"a"); assert(f); // D[rand() % NPARMS] += (rand() & 1) ? 1 : -1; int i; int T = it; int R0 = (T % 16) * 4; // row selector in R[]: 0 4 8 12 ... 60 0 4 8 12 ... 60 0 4 ... T /= 16; // strip lowest 4 bits (it / 16) //int P0 = (T % 4) * 4; // row selector in parms. After 16 iterations, the next row is modified: 0 0 0 ... (16x) 4 4 ... (16x) 8 ... (16x) 12 ... (16x) 0 .. int P0 = 12 - (T % 4) * 4; // row selector in parms. After 16 iterations, the previous row is modified: 12 12 12 ... (16x) 8 8 ... (16x) 4 ... (16x) 0 ... (16x) 12 .. T /= 4; // strip 2 bits (it / 64) int fac = (T & 1) ? -1 : 1; // 1 1 1 ... (64x) -1 -1 -1 ... (64x) 1 ... T /= 2; // strip 1 bit (it / 128) if(T * 128 == it) // it = 0 128 256 ... { // double all parms of the best parameter setting and start a new round for(i = 0;i < NPARMS;++i) best[i] *= 2; } /* NOT a good idea. if((it % 16) == 0) // After handling one row, copy(parms,best,NPARMS); // go on with best result so far. */ copybest(); //copy(parms,best,NPARMS); // go on with best result so far. for(i = 0;i < 4;++i) parms[P0+i] += fac*R[R0+i]; // 4N <= R0+i <= 4N+3 ++it; fclose(f); for(i = 0;i < NPARMS; ++i) { if(parms[i]) { minparmsoff = i / 4; minparmsoff *= 4; break; } } return false;//OnlyZeros <= P0; } bool init() { int x2 = 0; int i; for(i = 0;i < NPARMS;++i) { x2 += parms[i]*parms[i]; } if(x2 == 0) { copy(parms,best,NPARMS); } return true; } int pcnt[(NPARMS >> 2)+1] = {0,0,0,0,0}; void onlyZeros() { OnlyZeros = 0; for(unsigned int i = 0;i < sizeof(pcnt)/sizeof(pcnt[0]);++i) { if(pcnt[i] != 0) { OnlyZeros = (i+1) << 2; pcnt[i] = 0; } } FILE * f = fopen(parmstxt,"a"); assert(f); fprintf(f,"//OnlyZeros %d \n",OnlyZeros); fprintf(f,"//suffix only %s \n",::suffixonly ? "yes" : "no"); fclose(f); } void printparms(int Nnodes) { int i; FILE * f = fopen(parmstxt,"a"); assert(f); fprintf(f,"/* %d */\n",Nnodes); fprintf(f," {\n "); for(i = 0;i < NPARMS;++i) { fprintf(f,"%5d",parms[i]); if(((i+1) % 4) == 0) { if(i == NPARMS - 1) fprintf(f," //%d\n ",pcnt[i >> 2]); else fprintf(f,", //%d\n ",pcnt[i >> 2]); } else fprintf(f,","); } fprintf(f," //%9d\n",pcnt[i >> 2]); fprintf(f," }\n"); fclose(f); } int comp_parms(const vertex * a,const vertex * b) { //for(int o = 0;o < NPARMS;o += 4) if( a->R__R != b->R__R || a->W__R != b->W__R || a->R__W != b->R__W || a->W__W != b->W__W ) { int off = minparmsoff; if(off < parmsoff) off = parmsoff; for(int o = off;o < NPARMS;o += 4) { int A = parms[o]*a->R__R + parms[o+1]*a->W__R + parms[o+2]*a->R__W + parms[o+3]*a->W__W; int B = parms[o]*b->R__R + parms[o+1]*b->W__R + parms[o+2]*b->R__W + parms[o+3]*b->W__W; if(A != B) { ++pcnt[o >> 2]; // For counting the number of times the first, second, third or fourth condition has been used. // (Hypothesis: with parms as doubles the first condition is used and only in very special cases the second. // Addendum: This hypothesis holds.) return A > B ? -1 : 1; } } ++pcnt[NPARMS >> 2]; } return 0; } int comp_parms_off(const vertex * a,const vertex * b) { if( a->R__R != b->R__R || a->W__R != b->W__R || a->R__W != b->R__W || a->W__W != b->W__W ) { //for(int o = 0;o < NPARMS;o += 4) int off = minparmsoff; if(off < parmsoff) off = parmsoff; for(int o = off;o < NPARMS;o += 4) { int A = parms[o]*a->R__R + parms[o+1]*a->W__R + parms[o+2]*a->R__W + parms[o+3]*a->W__W; int B = parms[o]*b->R__R + parms[o+1]*b->W__R + parms[o+2]*b->R__W + parms[o+3]*b->W__W; if(A != B) { ++pcnt[o >> 2]; // For counting the number of times the first, second, third or fourth condition has been used. // (Hypothesis: with parms as doubles the first condition is used and only in very special cases the second. // Addendum: This hypothesis holds.) return A > B ? -1 : 1; } } ++pcnt[NPARMS >> 2]; } return 0; } static int nparms = 0; int comp_parms0(const vertex * a,const vertex * b) { for(int o = 0;o < nparms;o += 4) { int A = parms[o]*a->R__R + parms[o+1]*a->W__R + parms[o+2]*a->R__W + parms[o+3]*a->W__W; int B = parms[o]*b->R__R + parms[o+1]*b->W__R + parms[o+2]*b->R__W + parms[o+3]*b->W__W; if(A != B) { return A > B ? -1 : 1; } } return 0; } int comp_parms0_off(const vertex * a,const vertex * b) { int off = minparmsoff; if(off < parmsoff) off = parmsoff; for(int o = off;o < nparms;o += 4) { int A = parms[o]*a->R__R + parms[o+1]*a->W__R + parms[o+2]*a->R__W + parms[o+3]*a->W__W; int B = parms[o]*b->R__R + parms[o+1]*b->W__R + parms[o+2]*b->R__W + parms[o+3]*b->W__W; if(A != B) { return A > B ? -1 : 1; } } return 0; } struct funcstruct { bool compute_parms; char * number; char * name; int (*comp)(const vertex * a,const vertex * b); }; struct funcstruct funcstructs[] = { {true,"0","parms",comp_parms_off}, //makeaffix.exe mydata.txt 0 affixrules XX 123 parms #if _NA {false,"1","fairly_good",comp_fairly_good}, {false,"2","even_better",comp_even_better}, {false,"3","affiksFEW3",comp_affiksFEW3}, {false,"4","affiksFEW",comp_affiksFEW}, {false,"5","affiksFEW2",comp_affiksFEW2}, {false,"6","fixNA",comp_fixNA}, {false,"7","fruit",comp_fruit}, {false,"8","ice",comp_ice}, {false,"9","pisang",comp_pisang}, {false,"10","kiwi",comp_kiwi}, {false,"11","carrot",comp_carrot}, {false,"12","peen",comp_peen}, {false,"13","beet",comp_beet}, {false,"14","sugar",comp_sugar}, {false,"15","affiksFEW2org",comp_affiksFEW2org}, #endif {false,"16","honey",comp_honey}, {false,"17","koud",comp_koud}, {false,"18","parms0",comp_parms0}, {false,"19","parmsoff",comp_parms0_off}, {false,0,0,0} }; bool setCompetitionFunction(const char * functionname,const char * extra,bool simple) { int i; for(i = 0;funcstructs[i].number;++i) if(!strcmp(functionname,funcstructs[i].number) || !strcmp(functionname,funcstructs[i].name)) { comp = funcstructs[i].comp; if(comp == comp_parms_off && simple) comp = comp_parms; if(VERBOSE) { if(comp == comp_parms) { printf("comp == comp_parms\n"); } else if(comp == comp_parms_off) { printf("comp == comp_parms_off\n"); } } compute_parms = funcstructs[i].compute_parms; if(comp == comp_parms0 || comp == comp_parms0_off) { /* bool useoffset; bool suffixonly; char * langbase; int rows; int val[NPARMS]; */ for(unsigned int j = 0;j < sizeof(bests)/sizeof(bests[0]);++j) { if(bests[j].suffixonly == ::suffixonly && !strncmp(bests[j].langbase,extra,strlen(bests[j].langbase))) { printf("bests[%d].suffixonly == [%s] bests[%d].langbase == [%s]\n",j,bests[j].suffixonly ? "true" : "false",j,bests[j].langbase); if(bests[j].useoffset) { printf("comp = comp_parms0_off\n"); comp = comp_parms0_off; } else { printf("comp = comp_parms0\n"); comp = comp_parms0; } printf("bests[%d].rows == [%d]\n",j,bests[j].rows); nparms = bests[j].rows * ROWPARMS; for(int k = 0;k < nparms;++k) parms[k] = bests[j].val[k]; break; } } if(nparms == 0) { fprintf(stderr,"No parameters defined for \"%s\"\nChoose one of:\n",extra); for(unsigned int j = 0;j < sizeof(bests)/sizeof(bests[0]);++j) { fprintf(stderr,"\t%s %s\n",bests[j].langbase,bests[j].suffixonly ? "suffix":"affix"); } fprintf(stderr,"Or find optimal parameters for %s and put these in comp.cpp.\n",extra); getchar(); exit(-1); } if(VERBOSE) { if(comp == comp_parms0) printf("comp_parms0\n"); else printf("comp_parms0_off\n"); } } for(i = 0;i < nparms; ++i) { if(parms[i]) { minparmsoff = i / 4; minparmsoff *= 4; break; } } if(VERBOSE) printf("minparmsoff = %d \n",minparmsoff); return true; } return false; }