Subject: v19i057: Flex, a fast LEX replacement, Part03/07 Newsgroups: comp.sources.unix Sender: sources Approved: rsalz@uunet.UU.NET Submitted-by: Vern Paxson Posting-number: Volume 19, Issue 57 Archive-name: flex2/part03 #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'flex/main.c' <<'END_OF_FILE' X/* flex - tool to generate fast lexical analyzers X * X * X * Copyright (c) 1989 The Regents of the University of California. X * All rights reserved. X * X * This code is derived from software contributed to Berkeley by X * Vern Paxson. X * X * The United States Government has rights in this work pursuant to X * contract no. DE-AC03-76SF00098 between the United States Department of X * Energy and the University of California. X * X * Redistribution and use in source and binary forms are permitted X * provided that the above copyright notice and this paragraph are X * duplicated in all such forms and that any documentation, X * advertising materials, and other materials related to such X * distribution and use acknowledge that the software was developed X * by the University of California, Berkeley. The name of the X * University may not be used to endorse or promote products derived X * from this software without specific prior written permission. X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. X * X */ X X#ifndef lint X Xstatic char copyright[] = X "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; Xstatic char CR_continuation[] = "@(#) All rights reserved.\n"; X Xstatic char rcsid[] = X "@(#) $Header: main.c,v 2.2 89/06/20 16:36:26 vern Exp $ (LBL)"; X X#endif X X X#include "flexdef.h" X Xstatic char flex_version[] = "2.1 (beta)"; X X X/* these globals are all defined and commented in flexdef.h */ Xint printstats, syntaxerror, eofseen, ddebug, trace, spprdflt; Xint interactive, caseins, useecs, fulltbl, usemecs; Xint fullspd, gen_line_dirs, performance_report, backtrack_report; Xint yymore_used, reject, real_reject, continued_action; Xint yymore_really_used, reject_really_used; Xint datapos, dataline, linenum; XFILE *skelfile = NULL; Xchar *infilename = NULL; Xint onestate[ONE_STACK_SIZE], onesym[ONE_STACK_SIZE]; Xint onenext[ONE_STACK_SIZE], onedef[ONE_STACK_SIZE], onesp; Xint current_mns, num_rules, current_max_rules, lastnfa; Xint *firstst, *lastst, *finalst, *transchar, *trans1, *trans2; Xint *accptnum, *assoc_rule, *state_type, *rule_type, *rule_linenum; Xint current_state_type; Xint variable_trailing_context_rules; Xint numtemps, numprots, protprev[MSP], protnext[MSP], prottbl[MSP]; Xint protcomst[MSP], firstprot, lastprot, protsave[PROT_SAVE_SIZE]; Xint numecs, nextecm[CSIZE + 1], ecgroup[CSIZE + 1], nummecs, tecfwd[CSIZE + 1]; Xint tecbck[CSIZE + 1]; Xint lastsc, current_max_scs, *scset, *scbol, *scxclu, *sceof, *actvsc; Xchar **scname; Xint current_max_dfa_size, current_max_xpairs; Xint current_max_template_xpairs, current_max_dfas; Xint lastdfa, *nxt, *chk, *tnxt; Xint *base, *def, tblend, firstfree, **dss, *dfasiz; Xunion dfaacc_union *dfaacc; Xint *accsiz, *dhash, numas; Xint numsnpairs, jambase, jamstate; Xint lastccl, current_maxccls, *cclmap, *ccllen, *cclng, cclreuse; Xint current_max_ccl_tbl_size; Xchar *ccltbl; Xchar *starttime, *endtime, nmstr[MAXLINE]; Xint sectnum, nummt, hshcol, dfaeql, numeps, eps2, num_reallocs; Xint tmpuses, totnst, peakpairs, numuniq, numdup, hshsave; Xint num_backtracking, bol_needed; XFILE *temp_action_file; XFILE *backtrack_file; Xint end_of_buffer_state; X#ifndef SHORT_FILE_NAMES Xchar action_file_name[] = "/tmp/flexXXXXXX"; X#else Xchar action_file_name[] = "flexXXXXXX.tmp"; X#endif X X#ifndef SHORT_FILE_NAMES Xstatic char outfile[] = "lex.yy.c"; X#else Xstatic char outfile[] = "lexyy.c"; X#endif Xstatic int outfile_created = 0; X X X/* flex - main program X * X * synopsis (from the shell) X * flex [-v] [file ...] X */ X Xmain( argc, argv ) Xint argc; Xchar **argv; X X { X flexinit( argc, argv ); X X readin(); X X if ( syntaxerror ) X flexend( 1 ); X X if ( yymore_really_used == REALLY_USED ) X yymore_used = true; X else if ( yymore_really_used == REALLY_NOT_USED ) X yymore_used = false; X X if ( reject_really_used == REALLY_USED ) X reject = true; X else if ( reject_really_used == REALLY_NOT_USED ) X reject = false; X X if ( performance_report ) X { X if ( yymore_used ) X fprintf( stderr, X "yymore() entails a minor performance penalty\n" ); X X if ( interactive ) X fprintf( stderr, X "-I (interactive) entails a minor performance penalty\n" ); X X if ( reject ) X fprintf( stderr, X "REJECT entails a large performance penalty\n" ); X X if ( variable_trailing_context_rules ) X fprintf( stderr, X"Variable trailing context rules entail a large performance penalty\n" ); X } X X if ( reject ) X real_reject = true; X X if ( variable_trailing_context_rules ) X reject = true; X X if ( (fulltbl || fullspd) && reject ) X { X if ( real_reject ) X flexerror( "REJECT cannot be used with -f or -F" ); X else X flexerror( X "variable trailing context rules cannot be used with -f or -F" ); X } X X /* convert the ndfa to a dfa */ X ntod(); X X /* generate the C state transition tables from the DFA */ X make_tables(); X X /* note, flexend does not return. It exits with its argument as status. */ X X flexend( 0 ); X X /*NOTREACHED*/ X } X X X/* flexend - terminate flex X * X * synopsis X * int status; X * flexend( status ); X * X * status is exit status. X * X * note X * This routine does not return. X */ X Xflexend( status ) Xint status; X X { X int tblsiz; X char *flex_gettime(); X X if ( skelfile != NULL ) X (void) fclose( skelfile ); X X if ( temp_action_file ) X { X (void) fclose( temp_action_file ); X (void) unlink( action_file_name ); X } X X if ( status != 0 && outfile_created ) X { X (void) fclose( stdout ); X (void) unlink( outfile ); X } X X if ( backtrack_report ) X { X if ( num_backtracking == 0 ) X fprintf( backtrack_file, "No backtracking.\n" ); X else if ( fullspd || fulltbl ) X fprintf( backtrack_file, X "%d backtracking (non-accepting) states.\n", X num_backtracking ); X else X fprintf( backtrack_file, "Compressed tables always backtrack.\n" ); X X (void) fclose( backtrack_file ); X } X X if ( printstats ) X { X endtime = flex_gettime(); X X fprintf( stderr, "flex version %s usage statistics:\n", flex_version ); X fprintf( stderr, " started at %s, finished at %s\n", X starttime, endtime ); X X fprintf( stderr, " %d/%d NFA states\n", lastnfa, current_mns ); X fprintf( stderr, " %d/%d DFA states (%d words)\n", lastdfa, X current_max_dfas, totnst ); X fprintf( stderr, " %d rules\n", num_rules - 1 /* - 1 for def. rule */ ); X X if ( num_backtracking == 0 ) X fprintf( stderr, " No backtracking\n" ); X else if ( fullspd || fulltbl ) X fprintf( stderr, " %d backtracking (non-accepting) states\n", X num_backtracking ); X else X fprintf( stderr, " compressed tables always backtrack\n" ); X X if ( bol_needed ) X fprintf( stderr, " Beginning-of-line patterns used\n" ); X X fprintf( stderr, " %d/%d start conditions\n", lastsc, X current_max_scs ); X fprintf( stderr, " %d epsilon states, %d double epsilon states\n", X numeps, eps2 ); X X if ( lastccl == 0 ) X fprintf( stderr, " no character classes\n" ); X else X fprintf( stderr, X " %d/%d character classes needed %d/%d words of storage, %d reused\n", X lastccl, current_maxccls, X cclmap[lastccl] + ccllen[lastccl], X current_max_ccl_tbl_size, cclreuse ); X X fprintf( stderr, " %d state/nextstate pairs created\n", numsnpairs ); X fprintf( stderr, " %d/%d unique/duplicate transitions\n", X numuniq, numdup ); X X if ( fulltbl ) X { X tblsiz = lastdfa * numecs; X fprintf( stderr, " %d table entries\n", tblsiz ); X } X X else X { X tblsiz = 2 * (lastdfa + numtemps) + 2 * tblend; X X fprintf( stderr, " %d/%d base-def entries created\n", X lastdfa + numtemps, current_max_dfas ); X fprintf( stderr, " %d/%d (peak %d) nxt-chk entries created\n", X tblend, current_max_xpairs, peakpairs ); X fprintf( stderr, X " %d/%d (peak %d) template nxt-chk entries created\n", X numtemps * nummecs, current_max_template_xpairs, X numtemps * numecs ); X fprintf( stderr, " %d empty table entries\n", nummt ); X fprintf( stderr, " %d protos created\n", numprots ); X fprintf( stderr, " %d templates created, %d uses\n", X numtemps, tmpuses ); X } X X if ( useecs ) X { X tblsiz = tblsiz + CSIZE; X fprintf( stderr, " %d/%d equivalence classes created\n", X numecs, CSIZE ); X } X X if ( usemecs ) X { X tblsiz = tblsiz + numecs; X fprintf( stderr, " %d/%d meta-equivalence classes created\n", X nummecs, CSIZE ); X } X X fprintf( stderr, " %d (%d saved) hash collisions, %d DFAs equal\n", X hshcol, hshsave, dfaeql ); X fprintf( stderr, " %d sets of reallocations needed\n", num_reallocs ); X fprintf( stderr, " %d total table entries needed\n", tblsiz ); X } X X#ifndef VMS X exit( status ); X#else X exit( status + 1 ); X#endif X } X X X/* flexinit - initialize flex X * X * synopsis X * int argc; X * char **argv; X * flexinit( argc, argv ); X */ X Xflexinit( argc, argv ) Xint argc; Xchar **argv; X X { X int i, sawcmpflag, use_stdout; X char *arg, *skelname = NULL, *flex_gettime(), clower(), *mktemp(); X X printstats = syntaxerror = trace = spprdflt = interactive = caseins = false; X backtrack_report = performance_report = ddebug = fulltbl = fullspd = false; X yymore_used = continued_action = reject = false; X yymore_really_used = reject_really_used = false; X gen_line_dirs = usemecs = useecs = true; X X sawcmpflag = false; X use_stdout = false; X X /* read flags */ X for ( --argc, ++argv; argc ; --argc, ++argv ) X { X if ( argv[0][0] != '-' || argv[0][1] == '\0' ) X break; X X arg = argv[0]; X X for ( i = 1; arg[i] != '\0'; ++i ) X switch ( arg[i] ) X { X case 'b': X backtrack_report = true; X break; X X case 'c': X if ( i != 1 ) X flexerror( "-c flag must be given separately" ); X X if ( ! sawcmpflag ) X { X useecs = false; X usemecs = false; X fulltbl = false; X sawcmpflag = true; X } X X for ( ++i; arg[i] != '\0'; ++i ) X switch ( clower( arg[i] ) ) X { X case 'e': X useecs = true; X break; X X case 'F': X fullspd = true; X break; X X case 'f': X fulltbl = true; X break; X X case 'm': X usemecs = true; X break; X X default: X lerrif( "unknown -c option %c", X (int) arg[i] ); X break; X } X X goto get_next_arg; X X case 'd': X ddebug = true; X break; X X case 'f': X useecs = usemecs = false; X fulltbl = true; X break; X X case 'F': X useecs = usemecs = false; X fullspd = true; X break; X X case 'I': X interactive = true; X break; X X case 'i': X caseins = true; X break; X X case 'L': X gen_line_dirs = false; X break; X X case 'p': X performance_report = true; X break; X X case 'S': X if ( i != 1 ) X flexerror( "-S flag must be given separately" ); X X skelname = arg + i + 1; X goto get_next_arg; X X case 's': X spprdflt = true; X break; X X case 't': X use_stdout = true; X break; X X case 'T': X trace = true; X break; X X case 'v': X printstats = true; X break; X X default: X lerrif( "unknown flag %c", (int) arg[i] ); X break; X } X Xget_next_arg: /* used by -c and -S flags in lieu of a "continue 2" control */ X ; X } X X if ( (fulltbl || fullspd) && usemecs ) X flexerror( "full table and -cm don't make sense together" ); X X if ( (fulltbl || fullspd) && interactive ) X flexerror( "full table and -I are (currently) incompatible" ); X X if ( fulltbl && fullspd ) X flexerror( "full table and -F are mutually exclusive" ); X X if ( ! skelname ) X { X static char skeleton_name_storage[400]; X X skelname = skeleton_name_storage; X (void) strcpy( skelname, DEFAULT_SKELETON_FILE ); X } X X if ( ! use_stdout ) X { X FILE *prev_stdout = freopen( outfile, "w", stdout ); X X if ( prev_stdout == NULL ) X flexerror( "could not create lex.yy.c" ); X X outfile_created = 1; X } X X if ( argc ) X { X if ( argc > 1 ) X flexerror( "extraneous argument(s) given" ); X X yyin = fopen( infilename = argv[0], "r" ); X X if ( yyin == NULL ) X lerrsf( "can't open %s", argv[0] ); X } X X else X yyin = stdin; X X if ( backtrack_report ) X { X#ifndef SHORT_FILE_NAMES X backtrack_file = fopen( "lex.backtrack", "w" ); X#else X backtrack_file = fopen( "lex.bck", "w" ); X#endif X X if ( backtrack_file == NULL ) X flexerror( "could not create lex.backtrack" ); X } X X else X backtrack_file = NULL; X X X lastccl = 0; X lastsc = 0; X X /* initialize the statistics */ X starttime = flex_gettime(); X X if ( (skelfile = fopen( skelname, "r" )) == NULL ) X lerrsf( "can't open skeleton file %s", skelname ); X X (void) mktemp( action_file_name ); X X if ( (temp_action_file = fopen( action_file_name, "w" )) == NULL ) X lerrsf( "can't open temporary action file %s", action_file_name ); X X lastdfa = lastnfa = num_rules = numas = numsnpairs = tmpuses = 0; X numecs = numeps = eps2 = num_reallocs = hshcol = dfaeql = totnst = 0; X numuniq = numdup = hshsave = eofseen = datapos = dataline = 0; X num_backtracking = onesp = numprots = 0; X variable_trailing_context_rules = bol_needed = false; X X linenum = sectnum = 1; X firstprot = NIL; X X /* used in mkprot() so that the first proto goes in slot 1 X * of the proto queue X */ X lastprot = 1; X X if ( useecs ) X { X /* set up doubly-linked equivalence classes */ X ecgroup[1] = NIL; X X for ( i = 2; i <= CSIZE; ++i ) X { X ecgroup[i] = i - 1; X nextecm[i - 1] = i; X } X X nextecm[CSIZE] = NIL; X } X X else X { /* put everything in its own equivalence class */ X for ( i = 1; i <= CSIZE; ++i ) X { X ecgroup[i] = i; X nextecm[i] = BAD_SUBSCRIPT; /* to catch errors */ X } X } X X set_up_initial_allocations(); X } X X X/* readin - read in the rules section of the input file(s) X * X * synopsis X * readin(); X */ X Xreadin() X X { X if ( ddebug ) X puts( "#define FLEX_DEBUG" ); X X if ( fulltbl ) X puts( "#define FLEX_FULL_TABLE" ); X else if ( fullspd ) X puts( "#define FLEX_FAST_COMPRESSED" ); X else X puts( "#define FLEX_COMPRESSED" ); X X skelout(); X X line_directive_out( stdout ); X X if ( yyparse() ) X lerrif( "fatal parse error at line %d", linenum ); X X if ( useecs ) X { X numecs = cre8ecs( nextecm, ecgroup, CSIZE ); X ccl2ecl(); X } X X else X numecs = CSIZE; X X } X X X X/* set_up_initial_allocations - allocate memory for internal tables */ X Xset_up_initial_allocations() X X { X current_mns = INITIAL_MNS; X firstst = allocate_integer_array( current_mns ); X lastst = allocate_integer_array( current_mns ); X finalst = allocate_integer_array( current_mns ); X transchar = allocate_integer_array( current_mns ); X trans1 = allocate_integer_array( current_mns ); X trans2 = allocate_integer_array( current_mns ); X accptnum = allocate_integer_array( current_mns ); X assoc_rule = allocate_integer_array( current_mns ); X state_type = allocate_integer_array( current_mns ); X X current_max_rules = INITIAL_MAX_RULES; X rule_type = allocate_integer_array( current_max_rules ); X rule_linenum = allocate_integer_array( current_max_rules ); X X current_max_scs = INITIAL_MAX_SCS; X scset = allocate_integer_array( current_max_scs ); X scbol = allocate_integer_array( current_max_scs ); X scxclu = allocate_integer_array( current_max_scs ); X sceof = allocate_integer_array( current_max_scs ); X scname = allocate_char_ptr_array( current_max_scs ); X actvsc = allocate_integer_array( current_max_scs ); X X current_maxccls = INITIAL_MAX_CCLS; X cclmap = allocate_integer_array( current_maxccls ); X ccllen = allocate_integer_array( current_maxccls ); X cclng = allocate_integer_array( current_maxccls ); X X current_max_ccl_tbl_size = INITIAL_MAX_CCL_TBL_SIZE; X ccltbl = allocate_character_array( current_max_ccl_tbl_size ); X X current_max_dfa_size = INITIAL_MAX_DFA_SIZE; X X current_max_xpairs = INITIAL_MAX_XPAIRS; X nxt = allocate_integer_array( current_max_xpairs ); X chk = allocate_integer_array( current_max_xpairs ); X X current_max_template_xpairs = INITIAL_MAX_TEMPLATE_XPAIRS; X tnxt = allocate_integer_array( current_max_template_xpairs ); X X current_max_dfas = INITIAL_MAX_DFAS; X base = allocate_integer_array( current_max_dfas ); X def = allocate_integer_array( current_max_dfas ); X dfasiz = allocate_integer_array( current_max_dfas ); X accsiz = allocate_integer_array( current_max_dfas ); X dhash = allocate_integer_array( current_max_dfas ); X dss = allocate_int_ptr_array( current_max_dfas ); X dfaacc = allocate_dfaacc_union( current_max_dfas ); X } END_OF_FILE if test 16556 -ne `wc -c <'flex/main.c'`; then echo shar: \"'flex/main.c'\" unpacked with wrong size! fi # end of 'flex/main.c' fi if test -f 'flex/nfa.c' -a "${1}" != "-c" ; then echo shar: Will not clobber existing file \"'flex/nfa.c'\" else echo shar: Extracting \"'flex/nfa.c'\" \(17293 characters\) sed "s/^X//" >'flex/nfa.c' <<'END_OF_FILE' X/* nfa - NFA construction routines */ X X/* X * Copyright (c) 1989 The Regents of the University of California. X * All rights reserved. X * X * This code is derived from software contributed to Berkeley by X * Vern Paxson. X * X * The United States Government has rights in this work pursuant to X * contract no. DE-AC03-76SF00098 between the United States Department of X * Energy and the University of California. X * X * Redistribution and use in source and binary forms are permitted X * provided that the above copyright notice and this paragraph are X * duplicated in all such forms and that any documentation, X * advertising materials, and other materials related to such X * distribution and use acknowledge that the software was developed X * by the University of California, Berkeley. The name of the X * University may not be used to endorse or promote products derived X * from this software without specific prior written permission. X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. X */ X X#ifndef lint X Xstatic char copyright[] = X "@(#) Copyright (c) 1989 The Regents of the University of California.\n"; Xstatic char CR_continuation[] = "@(#) All rights reserved.\n"; X Xstatic char rcsid[] = X "@(#) $Header: nfa.c,v 2.0 89/06/20 15:50:05 vern Locked $ (LBL)"; X X#endif X X#include "flexdef.h" X X/* add_accept - add an accepting state to a machine X * X * synopsis X * X * add_accept( mach, accepting_number ); X * X * accepting_number becomes mach's accepting number. X */ X Xadd_accept( mach, accepting_number ) Xint mach; X X { X /* hang the accepting number off an epsilon state. if it is associated X * with a state that has a non-epsilon out-transition, then the state X * will accept BEFORE it makes that transition, i.e., one character X * too soon X */ X X if ( transchar[finalst[mach]] == SYM_EPSILON ) X accptnum[finalst[mach]] = accepting_number; X X else X { X int astate = mkstate( SYM_EPSILON ); X accptnum[astate] = accepting_number; X mach = link_machines( mach, astate ); X } X } X X X/* copysingl - make a given number of copies of a singleton machine X * X * synopsis X * X * newsng = copysingl( singl, num ); X * X * newsng - a new singleton composed of num copies of singl X * singl - a singleton machine X * num - the number of copies of singl to be present in newsng X */ X Xint copysingl( singl, num ) Xint singl, num; X X { X int copy, i; X X copy = mkstate( SYM_EPSILON ); X X for ( i = 1; i <= num; ++i ) X copy = link_machines( copy, dupmachine( singl ) ); X X return ( copy ); X } X X X/* dumpnfa - debugging routine to write out an nfa X * X * synopsis X * int state1; X * dumpnfa( state1 ); X */ X Xdumpnfa( state1 ) Xint state1; X X { X int sym, tsp1, tsp2, anum, ns; X X fprintf( stderr, "\n\n********** beginning dump of nfa with start state %d\n", X state1 ); X X /* we probably should loop starting at firstst[state1] and going to X * lastst[state1], but they're not maintained properly when we "or" X * all of the rules together. So we use our knowledge that the machine X * starts at state 1 and ends at lastnfa. X */ X X /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */ X for ( ns = 1; ns <= lastnfa; ++ns ) X { X fprintf( stderr, "state # %4d\t", ns ); X X sym = transchar[ns]; X tsp1 = trans1[ns]; X tsp2 = trans2[ns]; X anum = accptnum[ns]; X X fprintf( stderr, "%3d: %4d, %4d", sym, tsp1, tsp2 ); X X if ( anum != NIL ) X fprintf( stderr, " [%d]", anum ); X X fprintf( stderr, "\n" ); X } X X fprintf( stderr, "********** end of dump\n" ); X } X X X/* dupmachine - make a duplicate of a given machine X * X * synopsis X * X * copy = dupmachine( mach ); X * X * copy - holds duplicate of mach X * mach - machine to be duplicated X * X * note that the copy of mach is NOT an exact duplicate; rather, all the X * transition states values are adjusted so that the copy is self-contained, X * as the original should have been. X * X * also note that the original MUST be contiguous, with its low and high X * states accessible by the arrays firstst and lastst X */ X Xint dupmachine( mach ) Xint mach; X X { X int i, init, state_offset; X int state = 0; X int last = lastst[mach]; X X for ( i = firstst[mach]; i <= last; ++i ) X { X state = mkstate( transchar[i] ); X X if ( trans1[i] != NO_TRANSITION ) X { X mkxtion( finalst[state], trans1[i] + state - i ); X X if ( transchar[i] == SYM_EPSILON && trans2[i] != NO_TRANSITION ) X mkxtion( finalst[state], trans2[i] + state - i ); X } X X accptnum[state] = accptnum[i]; X } X X if ( state == 0 ) X flexfatal( "empty machine in dupmachine()" ); X X state_offset = state - i + 1; X X init = mach + state_offset; X firstst[init] = firstst[mach] + state_offset; X finalst[init] = finalst[mach] + state_offset; X lastst[init] = lastst[mach] + state_offset; X X return ( init ); X } X X/* finish_rule - finish up the processing for a rule X * X * synopsis X * X * finish_rule( mach, variable_trail_rule, headcnt, trailcnt ); X * X * An accepting number is added to the given machine. If variable_trail_rule X * is true then the rule has trailing context and both the head and trail X * are variable size. Otherwise if headcnt or trailcnt is non-zero then X * the machine recognizes a pattern with trailing context and headcnt is X * the number of characters in the matched part of the pattern, or zero X * if the matched part has variable length. trailcnt is the number of X * trailing context characters in the pattern, or zero if the trailing X * context has variable length. X */ X Xfinish_rule( mach, variable_trail_rule, headcnt, trailcnt ) Xint mach, variable_trail_rule, headcnt, trailcnt; X X { X add_accept( mach, num_rules ); X X /* we did this in new_rule(), but it often gets the wrong X * number because we do it before we start parsing the current rule X */ X rule_linenum[num_rules] = linenum; X X fprintf( temp_action_file, "case %d:\n", num_rules ); X X if ( variable_trail_rule ) X { X rule_type[num_rules] = RULE_VARIABLE; X X if ( performance_report ) X fprintf( stderr, "Variable trailing context rule at line %d\n", X rule_linenum[num_rules] ); X X variable_trailing_context_rules = true; X } X X else X { X rule_type[num_rules] = RULE_NORMAL; X X if ( headcnt > 0 || trailcnt > 0 ) X { X /* do trailing context magic to not match the trailing characters */ X char *scanner_cp = "yy_c_buf_p = yy_cp"; X char *scanner_bp = "yy_bp"; X X fprintf( temp_action_file, X "*yy_cp = yy_hold_char; /* undo effects of setting up yytext */\n" ); X X if ( headcnt > 0 ) X { X if ( headcnt > 0 ) X fprintf( temp_action_file, "%s = %s + %d;\n", X scanner_cp, scanner_bp, headcnt ); X X else X fprintf( temp_action_file, "%s = %s;\n", X scanner_cp, scanner_bp ); X } X X else X fprintf( temp_action_file, X "%s -= %d;\n", scanner_cp, trailcnt ); X X fprintf( temp_action_file, X "YY_DO_BEFORE_ACTION; /* set up yytext again */\n" ); X } X } X X line_directive_out( temp_action_file ); X } X X X/* link_machines - connect two machines together X * X * synopsis X * X * new = link_machines( first, last ); X * X * new - a machine constructed by connecting first to last X * first - the machine whose successor is to be last X * last - the machine whose predecessor is to be first X * X * note: this routine concatenates the machine first with the machine X * last to produce a machine new which will pattern-match first first X * and then last, and will fail if either of the sub-patterns fails. X * FIRST is set to new by the operation. last is unmolested. X */ X Xint link_machines( first, last ) Xint first, last; X X { X if ( first == NIL ) X return ( last ); X X else if ( last == NIL ) X return ( first ); X X else X { X mkxtion( finalst[first], last ); X finalst[first] = finalst[last]; X lastst[first] = max( lastst[first], lastst[last] ); X firstst[first] = min( firstst[first], firstst[last] ); X X return ( first ); X } X } X X X/* mark_beginning_as_normal - mark each "beginning" state in a machine X * as being a "normal" (i.e., not trailing context- X * associated) states X * X * synopsis X * X * mark_beginning_as_normal( mach ) X * X * mach - machine to mark X * X * The "beginning" states are the epsilon closure of the first state X */ X Xmark_beginning_as_normal( mach ) Xregister int mach; X X { X switch ( state_type[mach] ) X { X case STATE_NORMAL: X /* oh, we've already visited here */ X return; X X case STATE_TRAILING_CONTEXT: X state_type[mach] = STATE_NORMAL; X X if ( transchar[mach] == SYM_EPSILON ) X { X if ( trans1[mach] != NO_TRANSITION ) X mark_beginning_as_normal( trans1[mach] ); X X if ( trans2[mach] != NO_TRANSITION ) X mark_beginning_as_normal( trans2[mach] ); X } X break; X X default: X flexerror( "bad state type in mark_beginning_as_normal()" ); X break; X } X } X X X/* mkbranch - make a machine that branches to two machines X * X * synopsis X * X * branch = mkbranch( first, second ); X * X * branch - a machine which matches either first's pattern or second's X * first, second - machines whose patterns are to be or'ed (the | operator) X * X * note that first and second are NEITHER destroyed by the operation. Also, X * the resulting machine CANNOT be used with any other "mk" operation except X * more mkbranch's. Compare with mkor() X */ X Xint mkbranch( first, second ) Xint first, second; X X { X int eps; X X if ( first == NO_TRANSITION ) X return ( second ); X X else if ( second == NO_TRANSITION ) X return ( first ); X X eps = mkstate( SYM_EPSILON ); X X mkxtion( eps, first ); X mkxtion( eps, second ); X X return ( eps ); X } X X X/* mkclos - convert a machine into a closure X * X * synopsis X * new = mkclos( state ); X * X * new - a new state which matches the closure of "state" X */ X Xint mkclos( state ) Xint state; X X { X return ( mkopt( mkposcl( state ) ) ); X } X X X/* mkopt - make a machine optional X * X * synopsis X * X * new = mkopt( mach ); X * X * new - a machine which optionally matches whatever mach matched X * mach - the machine to make optional X * X * notes: X * 1. mach must be the last machine created X * 2. mach is destroyed by the call X */ X Xint mkopt( mach ) Xint mach; X X { X int eps; X X if ( ! SUPER_FREE_EPSILON(finalst[mach]) ) X { X eps = mkstate( SYM_EPSILON ); X mach = link_machines( mach, eps ); X } X X /* can't skimp on the following if FREE_EPSILON(mach) is true because X * some state interior to "mach" might point back to the beginning X * for a closure X */ X eps = mkstate( SYM_EPSILON ); X mach = link_machines( eps, mach ); X X mkxtion( mach, finalst[mach] ); X X return ( mach ); X } X X X/* mkor - make a machine that matches either one of two machines X * X * synopsis X * X * new = mkor( first, second ); X * X * new - a machine which matches either first's pattern or second's X * first, second - machines whose patterns are to be or'ed (the | operator) X * X * note that first and second are both destroyed by the operation X * the code is rather convoluted because an attempt is made to minimize X * the number of epsilon states needed X */ X Xint mkor( first, second ) Xint first, second; X X { X int eps, orend; X X if ( first == NIL ) X return ( second ); X X else if ( second == NIL ) X return ( first ); X X else X { X /* see comment in mkopt() about why we can't use the first state X * of "first" or "second" if they satisfy "FREE_EPSILON" X */ X eps = mkstate( SYM_EPSILON ); X X first = link_machines( eps, first ); X X mkxtion( first, second ); X X if ( SUPER_FREE_EPSILON(finalst[first]) && X accptnum[finalst[first]] == NIL ) X { X orend = finalst[first]; X mkxtion( finalst[second], orend ); X } X X else if ( SUPER_FREE_EPSILON(finalst[second]) && X accptnum[finalst[second]] == NIL ) X { X orend = finalst[second]; X mkxtion( finalst[first], orend ); X } X X else X { X eps = mkstate( SYM_EPSILON ); X X first = link_machines( first, eps ); X orend = finalst[first]; X X mkxtion( finalst[second], orend ); X } X } X X finalst[first] = orend; X return ( first ); X } X X X/* mkposcl - convert a machine into a positive closure X * X * synopsis X * new = mkposcl( state ); X * X * new - a machine matching the positive closure of "state" X */ X Xint mkposcl( state ) Xint state; X X { X int eps; X X if ( SUPER_FREE_EPSILON(finalst[state]) ) X { X mkxtion( finalst[state], state ); X return ( state ); X } X X else X { X eps = mkstate( SYM_EPSILON ); X mkxtion( eps, state ); X return ( link_machines( state, eps ) ); X } X } X X X/* mkrep - make a replicated machine X * X * synopsis X * new = mkrep( mach, lb, ub ); X * X * new - a machine that matches whatever "mach" matched from "lb" X * number of times to "ub" number of times X * X * note X * if "ub" is INFINITY then "new" matches "lb" or more occurrences of "mach" X */ X Xint mkrep( mach, lb, ub ) Xint mach, lb, ub; X X { X int base_mach, tail, copy, i; X X base_mach = copysingl( mach, lb - 1 ); X X if ( ub == INFINITY ) X { X copy = dupmachine( mach ); X mach = link_machines( mach, X link_machines( base_mach, mkclos( copy ) ) ); X } X X else X { X tail = mkstate( SYM_EPSILON ); X X for ( i = lb; i < ub; ++i ) X { X copy = dupmachine( mach ); X tail = mkopt( link_machines( copy, tail ) ); X } X X mach = link_machines( mach, link_machines( base_mach, tail ) ); X } X X return ( mach ); X } X X X/* mkstate - create a state with a transition on a given symbol X * X * synopsis X * X * state = mkstate( sym ); X * X * state - a new state matching sym X * sym - the symbol the new state is to have an out-transition on X * X * note that this routine makes new states in ascending order through the X * state array (and increments LASTNFA accordingly). The routine DUPMACHINE X * relies on machines being made in ascending order and that they are X * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge X * that it admittedly is) X */ X Xint mkstate( sym ) Xint sym; X X { X if ( ++lastnfa >= current_mns ) X { X if ( (current_mns += MNS_INCREMENT) >= MAXIMUM_MNS ) X lerrif( "input rules are too complicated (>= %d NFA states)", X current_mns ); X X ++num_reallocs; X X firstst = reallocate_integer_array( firstst, current_mns ); X lastst = reallocate_integer_array( lastst, current_mns ); X finalst = reallocate_integer_array( finalst, current_mns ); X transchar = reallocate_integer_array( transchar, current_mns ); X trans1 = reallocate_integer_array( trans1, current_mns ); X trans2 = reallocate_integer_array( trans2, current_mns ); X accptnum = reallocate_integer_array( accptnum, current_mns ); X assoc_rule = reallocate_integer_array( assoc_rule, current_mns ); X state_type = reallocate_integer_array( state_type, current_mns ); X } X X firstst[lastnfa] = lastnfa; X finalst[lastnfa] = lastnfa; X lastst[lastnfa] = lastnfa; X transchar[lastnfa] = sym; X trans1[lastnfa] = NO_TRANSITION; X trans2[lastnfa] = NO_TRANSITION; X accptnum[lastnfa] = NIL; X assoc_rule[lastnfa] = num_rules; X state_type[lastnfa] = current_state_type; X X /* fix up equivalence classes base on this transition. Note that any X * character which has its own transition gets its own equivalence class. X * Thus only characters which are only in character classes have a chance X * at being in the same equivalence class. E.g. "a|b" puts 'a' and 'b' X * into two different equivalence classes. "[ab]" puts them in the same X * equivalence class (barring other differences elsewhere in the input). X */ X X if ( sym < 0 ) X { X /* we don't have to update the equivalence classes since that was X * already done when the ccl was created for the first time X */ X } X X else if ( sym == SYM_EPSILON ) X ++numeps; X X else X { X if ( useecs ) X mkechar( sym, nextecm, ecgroup ); X } X X return ( lastnfa ); X } X X X/* mkxtion - make a transition from one state to another X * X * synopsis X * X * mkxtion( statefrom, stateto ); X * X * statefrom - the state from which the transition is to be made X * stateto - the state to which the transition is to be made X */ X Xmkxtion( statefrom, stateto ) Xint statefrom, stateto; X X { X if ( trans1[statefrom] == NO_TRANSITION ) X trans1[statefrom] = stateto; X X else if ( (transchar[statefrom] != SYM_EPSILON) || X (trans2[statefrom] != NO_TRANSITION) ) X flexfatal( "found too many transitions in mkxtion()" ); X X else X { /* second out-transition for an epsilon state */ X ++eps2; X trans2[statefrom] = stateto; X } X } X X/* new_rule - initialize for a new rule X * X * synopsis X * X * new_rule(); X * X * the global num_rules is incremented and the any corresponding dynamic X * arrays (such as rule_type[]) are grown as needed. X */ X Xnew_rule() X X { X if ( ++num_rules >= current_max_rules ) X { X ++num_reallocs; X current_max_rules += MAX_RULES_INCREMENT; X rule_type = reallocate_integer_array( rule_type, current_max_rules ); X rule_linenum = X reallocate_integer_array( rule_linenum, current_max_rules ); X } X X if ( num_rules > MAX_RULE ) X lerrif( "too many rules (> %d)!", MAX_RULE ); X X rule_linenum[num_rules] = linenum; X } END_OF_FILE if test 17293 -ne `wc -c <'flex/nfa.c'`; then echo shar: \"'flex/nfa.c'\" unpacked with wrong size! fi # end of 'flex/nfa.c' fi echo shar: End of archive 3 \(of 7\). cp /dev/null ark3isdone MISSING="" for I in 1 2 3 4 5 6 7 ; do if test ! -f ark${I}isdone ; then MISSING="${MISSING} ${I}" fi done if test "${MISSING}" = "" ; then echo You have unpacked all 7 archives. rm -f ark[1-9]isdone else echo You still need to unpack the following archives: echo " " ${MISSING} fi ## End of shell archive. exit 0