%{ #include using namespace std; %} %option noyywrap %x STEP0 %x STEP1a STEP1b STEP1b1 STEP1b2 STEP1b3 STEP1c STEP1D STEP1E %x STEP2 STEP21 STEP22 STEP23 STEP24 STEP25 STEP26 STEP27 STEP28 STEP29 STEP291 STEP292 STEP293 %x STEP3 STEP31 STEP32 STEP33 STEP34 STEP35 %x STEP4 STEP41 STEP42 STEP43 STEP44 STEP45 %x STEP5a STEP5a1 STEP5b %x STEP6 C ([a-z]{-}[aeiouy])*([a-z]{-}[aeiou]) Ve [aeiou]+ VbC [aeiouy]+ W [a-z] M_ONE {VbC}?{C}({VbC}{C}|{Ve}) M_GT_ZERO {VbC}?(({C}{VbC})+{C}|({C}{VbC})*{C}{Ve}) M_GT_ONE {VbC}?(({C}{VbC})+{C}{Ve}|({C}{VbC}){2,}{C}) %% {W}* { char *s = strdup(yytext); // Reversing the token: for (int i = 0; i < yyleng; ++i) unput(s[i]); if (yyleng <= 2) BEGIN(STEP6); else BEGIN(STEP1a); } (sess|sei){W}* { yyless(2); BEGIN(STEP1b); /* SSES -> SS; IES -> I */ } ss{W}* { yyless(0); BEGIN(STEP1b); /* SS -> SS */ } s{W}* { yyless(1); BEGIN(STEP1b); /* S -> */ } {W}* { yyless(0); BEGIN(STEP1b); } dee{W}* { yyless(0); BEGIN(STEP1b1); } gni{W}* { yyless(0); BEGIN(STEP1b2); } de{W}* { yyless(0); BEGIN(STEP1b3); } dee{M_GT_ZERO} { yyless(1); BEGIN(STEP1c); /* (m>0) EED -> EE */ } gni{W}*({Ve}|{VbC}{C}) { yyless(3); BEGIN(STEP1D); /* (*v*) ING -> */ } de{W}*({Ve}|{VbC}{C}) { yyless(2); BEGIN(STEP1D); /* (*v*) ED -> */ } {W}* { yyless(0); BEGIN(STEP1c); } (ta|lb|zi){W}* { yyless(0); unput('e'); BEGIN(STEP1c); /*AT -> ATE; BL -> BLE; IZ -> IZE*/ } {C}{W}* { if (yytext[0] == yytext[1] && yytext[0] != 'l' && yytext[0] != 's' && yytext[0] != 'z') { yyless(1); BEGIN(STEP1c); } else { yyless(0); BEGIN(STEP1E); } /* (*d and not (*L or *S or *Z)) -> single letter */} ([a-z]{-}[aeiouwxy])[aeiouy]{C} { yyless(0); unput('e'); BEGIN(STEP1c); /* (m=1 and *o) -> E */ } {W}* { yyless(0); BEGIN(STEP1c); } y{W}*({Ve}|{VbC}{C}) { yyless(1); unput('i'); BEGIN(STEP2); /* (*v*) Y -> I */ } {W}* { yyless(0); BEGIN(STEP2); } (lanoita|noitazi){W}* { yyless(0); BEGIN(STEP21); } (lanoita|noitazi){M_GT_ZERO} { yyless(5); unput('e'); BEGIN(STEP3); //7 /* (m>0) ATIONAL -> ATE; (m>0) IZATION -> IZE */ } (ssenevi|ssenluf|ssensou){W}* { yyless(0); BEGIN(STEP22); } (ssenevi|ssenluf|ssensou){M_GT_ZERO} { yyless(4); BEGIN(STEP3); //7 /* (m>0) IVENESS -> IVE; (m>0) FULNESS -> FUL; (m>0) OUSNESS -> OUS */ } (lanoit|ilsuo|iltne){W}* { yyless(0); BEGIN(STEP23); } (lanoit|ilsuo|iltne){M_GT_ZERO} { yyless(2); BEGIN(STEP3); //6 /* (m>0) TIONAL -> TION; (m>0) OUSLI -> OUS; (m>0) ENTLI -> ENT */ } itilib{W}* { yyless(0); BEGIN(STEP24); } itilib{M_GT_ZERO} { yyless(5); unput('l'); unput('e'); BEGIN(STEP3); //6 /* (m>0) BILITI -> BLE */ } (noita|itivi){W}* { yyless(0); BEGIN(STEP25); } (noita|itivi){M_GT_ZERO} { yyless(3); unput('e'); BEGIN(STEP3); //5 /* (m>0) ATION -> ATE; (m>0) IVITI -> IVE */ } (msila|itila){W}* { yyless(0); BEGIN(STEP26); } (msila|itila){M_GT_ZERO} { yyless(3); BEGIN(STEP3); //5 /* (m>0) ALISM -> AL; (m>0) ALITI -> AL */ } (icne|icna|igol){W}* { yyless(0); BEGIN(STEP27); } (icne|icna|igol){M_GT_ZERO} { yyless(1); unput('e'); BEGIN(STEP3); //4 /* (m>0) ENCI -> ENCE; (m>0) ANCI -> ANCE; extra: (m>0) logi -> log */ } rezi{W}* { yyless(0); BEGIN(STEP28); } rezi{M_GT_ZERO} { yyless(1); BEGIN(STEP3); /* (m>0) IZER -> IZE */ } illa{W}* { yyless(0); BEGIN(STEP29); } illa{M_GT_ZERO} { yyless(2); BEGIN(STEP3); //4 /* (m>0) ALLI -> AL */ } rota{W}* { yyless(0); BEGIN(STEP291); } rota{M_GT_ZERO} { yyless(2); unput('e'); BEGIN(STEP3); //4 /* (m>0) ATOR -> ATE */ } ilb{W}* { yyless(0); BEGIN(STEP292); } ilb{M_GT_ZERO} { yyless(1); unput('e'); BEGIN(STEP3); /* (m>0) bli -> ble (instead of (m>0) abli -> able)) */ } ile{W}* { yyless(0); BEGIN(STEP293); } ile{M_GT_ZERO} { yyless(2); BEGIN(STEP3); /* (m>0) ELI -> E */ } {W}* { yyless(0); BEGIN(STEP3); } (etaci|ezila|itici){W}* { yyless(0); BEGIN(STEP31); } (etaci|ezila|itici){M_GT_ZERO} { yyless(3); BEGIN(STEP4); //5 /* (m>0) ICATE -> IC; (m>0) ALIZE -> AL; (m>0) ICITI -> IC */ } evita{W}* { yyless(0); BEGIN(STEP32); } evita{M_GT_ZERO} { yyless(5); BEGIN(STEP4); //5 /* (m>0) ATIVE -> */ } laci{W}* { yyless(0); BEGIN(STEP33); } laci{M_GT_ZERO} { yyless(2); BEGIN(STEP4); //4 /* (m>0) ICAL -> IC */ } ssen{W}* { yyless(0); BEGIN(STEP34); } ssen{M_GT_ZERO} { yyless(4); BEGIN(STEP4); //4 /* (m>0) NESS -> */ } (luf){W}* { yyless(0); BEGIN(STEP35); } (luf){M_GT_ZERO} { yyless(3); BEGIN(STEP4); /* (m>0) FUL -> */ } {W}* { yyless(0); BEGIN(STEP4); } tneme{W}* { yyless(0); BEGIN(STEP41); } tneme{M_GT_ONE} { yyless(5); BEGIN(STEP5a); //5 /* (m>1) EMENT -> */ } (ecna|ecne|elba|elbi|tnem){W}* { yyless(0); BEGIN(STEP42); } (ecna|ecne|elba|elbi|tnem){M_GT_ONE} { yyless(4); BEGIN(STEP5a); //4 /* (m>1) ANCE -> ; (m>1) ENCE -> ; (m>1) ABLE -> ; (m>1) IBLE -> ;(m>1) MENT -> */ } (tna|tne|msi|eta|iti|suo|evi|ezi){W}* { yyless(0); BEGIN(STEP43); } (tna|tne|msi|eta|iti|suo|evi|ezi){M_GT_ONE} { yyless(3); BEGIN(STEP5a); /* (m>1) ANT -> ; (m>1) ENT -> ; (m>1) ISM -> ; (m>1) ATE -> ; (m>1) ITI -> ; (m>1) OUS -> ; (m>1) IVE -> ; (m>1) IZE -> */ } noi{W}* { yyless(0); BEGIN(STEP44); } noi[st]{C}?{VbC}({C}{VbC})*{C}({Ve}|{VbC}{C}) { yyless(3); BEGIN(STEP5a); /* (m>1 and (*S or *T)) ION -> */ } (la|re|ci|uo){W}* { yyless(0); BEGIN(STEP45); } (la|re|ci|uo){M_GT_ONE} { yyless(2); BEGIN(STEP5a); /* (m>1) AL -> ; (m>1) ER -> ; (m>1) IC -> ; (m>1) OU -> */ } {W}* { yyless(0); BEGIN(STEP5a); } e{M_GT_ONE} { yyless(1); BEGIN(STEP5b); /* (m>1) E -> */ } e{M_ONE} { yyless(0); BEGIN(STEP5a1); /* (m=1 and not *o) E -> ; part "m=1" */ } {W}* { yyless(0); BEGIN(STEP5b); } e([a-z]{-}[aeiouwxy])[aeiouy]{C}{W}* { yyless(0); BEGIN(STEP5b); /* (m=1 and not *o) E -> ; part "*o" */ } {W}* { yyless(1); BEGIN(STEP5b); /* (m=1 and not *o) E -> ; part "not *o" */ } ll{C}?{VbC}({C}{VbC})*{C}({Ve}|{VbC}{C}) { yyless(1); BEGIN(STEP6); /*(m > 1 and *d and *L) -> single letter*/ } {W}* { yyless(0); BEGIN(STEP6); } {W}* { char *s = strdup(yytext); // Reversing the token: for (int i = 0; i < yyleng; ++i) { cout << s[yyleng-1-i]; } cout << endl; } [\n\r]+ { BEGIN(STEP0); } %% int main() { BEGIN(STEP0); yylex(); }