#!/usr/bin/perl -w
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
	if 0;
$0 =~ s/^.*?(\w+)$/$1/;

# (p)lex - Perl lexical analyzer generator
# History:  Sep 16 2000: Original version.

use strict;
use Carp;
use Getopt::Long;

#######
# Input file handling
#
my( $TheFile, $isEOF );

sub openARGV(){
    $TheFile = shift( @ARGV );
    open( ARG, "<$TheFile" )
    || die( "$0: can't open $TheFile for reading ($!)\n" );
    $isEOF = 0;
}

sub getsARGV(;\$){
    my $argref = @_ ? shift() : \$_; 
    while( $isEOF || ! defined( $$argref = <ARG> ) ){
	close( ARG );
	return 0 unless @ARGV;
	openARGV();
    }
    1;
}

sub eofARGV(){
    return @ARGV == 0 && ( $isEOF = eof( ARG ) );
}


######
# Error handling
#
my $nError = 0;
sub Error($){
    my( $msg ) = @_;
    warn( "$0: $TheFile:$.: $msg\n" );
    $nError++;
}

######
# Options
#
my( $doDebug, $doMain, $doHelp, $doFold, $doSkip, $doVerbose,
    $Package, $Output, $Version );
$Version = '1.0';
$doDebug = $doMain = $doHelp = $doFold = $doSkip = 0;
$Package = 'plex';

######
# Compile time data structures for collecting information 
#
my( %Define, %Code, %StartCond, @Rule, $noReject, $noCut );
$StartCond{INITIAL} = 's';
$StartCond{'*'}     = 's';
$noReject = 1;
$noCut    = 1;

my $state = 'defs'; # -> 'rule' -> 'code'
my $verba = 0;
my $INF = 0x7fffffff;

# trStartCond - translate start condition from namestring to numeric
#
sub trStartCond($){
    my $cond = shift();
    if( exists( $StartCond{$cond} ) ){
	return $StartCond{$cond}{int};
    } else {
	Error( "undefined start condition `$cond'" );
	return 0;
    }
}

# addCode( 'def'|'init'|'user', <text> )
#    add some text to one of the code chunks
sub addCode($$){
    my( $which, $text ) = @_;
    if( ! exists( $Code{$which} ) ){
        $Code{$which} = { start => "$TheFile:$.", text => '' };
    }
    $Code{$which}{text} .= $text;
}

sub emitCode($$){
    my( $which, $fh ) = @_;
    if( exists( $Code{$which} ) ){
        $Code{$which}{start} =~ /^(.*):(\d+)$/;
        my( $file, $line ) = ( $1, $2 );
        $file =~ s/\\/\\\\/g;
        $file =~ s/"/\\"/g;
        print $fh "#line $line \"$file\"\n";
        print $fh $Code{$which}{text};
    }
}


# parseRule( <line> ) - parse 1st line of a rule entry
#   returns: reference to array of start conditions
#            the converted pattern
#            maximum number of chars the pattern can match
#            whether the pattern contains a trailing context
#            the Perl action code line
#            whether the pattern begins with `^'
#            whether the pattern ends with `$'
#
sub peek(\$$){
    my( $pref, $ic ) = @_;
    $ic < length($$pref)-1 ? substr( $$pref, $ic+1, 1 ) : '';
}
sub inc(\$$){
    my( $cntref, $inc ) = @_;
    confess( "undef" ) unless defined( $$cntref );
    $$cntref += $inc if $$cntref < $INF;
}
sub parseRule($){
    my( $line ) = @_;

    ### print "parsing: $line\n";

    my $res = '';
    my $bracklev = 0;
    my @parlev = ( 0 );
    my @curlen;
    my $lastlen = 0;
    my $literal;
    my $litlen;
    my $maxlen = 0;
    my $trailer = 0;
    my $begline = 0;
    my $endline = 0;
    $begline = 1 if $line =~ s/^\^//;
    my $ic;
    for( $ic = 0; $ic < length( $line ); $ic++ ){
        my $c = substr( $line, $ic, 1 );
	if( defined( $literal ) ){
	    if( $c eq '\\' ){
		### backslash escapes
		my $nc = peek($line,$ic);
		if( $nc eq '' ){
		    Error( "Bad RE `$line': `\\' cannot be last" );
		    return undef();
		}
		$ic++;
		if( $nc eq '\\' ){
		    $literal .= '\\';
		    $litlen++;
		    
		} elsif( $nc eq '"' ){
		    $literal .= '"';
		    $litlen++;
		} else {  ## should this be an error: \ not preceding \ or "
		    $literal .= '\\' . $nc;
		    $litlen++;
		}
	    } elsif( $c eq '"' ){
		$res .= '(?:' . quotemeta( $literal ) . ')';
		inc( $maxlen, $lastlen = $litlen );
		undef( $literal );
	    } else {
		$literal .= $c;
   	        $litlen++;
	    }
	    next;
	}

        if( $c eq '\\' ){
	    ### backslash escapes
	    my $nc = peek($line,$ic);
	    if( $nc eq '' ){
		Error( "Bad RE `$line': `\\' cannot be last" );
		return undef();
	    }
	    $ic++;
	    if( $nc eq 'b' ){
		$res .= '\010';
		inc( $maxlen, $lastlen = 1 );
	    } elsif( $nc eq 'v' ){
		$res .= '\013';
		inc( $maxlen, $lastlen = 1 );
	    } elsif( $nc eq '\\' ){
		$res .= '\\\\';
		inc( $maxlen, $lastlen = 1 );
	    } elsif( $nc =~ m{[][().*|?+^\$\\afnrtx0-7/]} ){
		## check for \-escaped magics and ASCII escapes
		## [] () . * | ? + ^ $ \, octal, hex,  / 
		$res .= '\\' . $nc;
		inc( $maxlen, $lastlen = 1 );
	    } else { ## \ <char> => <char> ("as if `\' were not present")
		$res .= $nc;
		inc( $maxlen, $lastlen = 1 );
	    }

	} elsif( $c eq '.' ){ ## . => .
	    $res .= $c;
	    inc( $maxlen, $lastlen = 1 );

	} elsif( $c eq '|' ){ ## alternative
	    $res .= $c;
	    Error( "empty alternative in RE `$line' ($res...)" )
               unless $maxlen;
	    $parlev[-1] = $maxlen if $maxlen > $parlev[-1];
	    $maxlen = 0;

	} elsif( $c eq '/' ){ ## trailing context
	    if( $trailer ){
		Error( "Bad RE `$line': duplicate trailing context?" );
		return undef();
	    }
	    $res .= ')(';
	    $trailer = 1;

	} elsif( $c eq '"' ){ ## begin literal string
	    $literal = '';
            $litlen = 0;

	} elsif( $c eq '@' ){ ## escape @
	    $res .= "\\$c";
	    inc( $maxlen, $lastlen = 1 );

	} elsif( $c eq '$' ){ ## unescaped $ that's not last is not magic
  	    my $nc = peek($line,$ic);
	    if( $nc eq '' || $nc =~ /[ \t]/ ){
		$endline = 1;
	    } else {
		$res .= "\\$c"; 
		inc( $maxlen, $lastlen = 1 );
	    }

	} elsif( $c =~ /[*?+]/ ){ ## *?+ => *?+, but \-escape if leading
	    if( $maxlen == 0 ){
		Error( "Bad RE `$line': `$c' without preceding RE" );
		return undef();
	    } elsif( substr( $res, -1, 1 ) ne '*' ){
		$res .= $c;
		if( $c eq '?' ){
		    inc( $maxlen, $lastlen = 1 );
		} else {
		    $maxlen = $lastlen = $INF;
		}
	    }

	} elsif( $c eq '[' ){
	    ## parse []: [^...] [^]...] [-...]
	    my $add = '[';
	    if( peek($line,$ic) eq '^' ){
		$ic++;
		$add .= '^';
	    }
	    my $nc = peek($line,$ic);
	    if( $nc eq ']' || $nc eq '-' ){
		$add .= $nc;
		$ic++;
	    }
	    # check that [ is not trailing
	    if( $ic >= length( $line ) - 1 ){
		Error( "Bad RE `$line': `[' cannot be last" );
		return undef();
	    }
	    # look for [:...:] and x-y
	    my $rstr = substr( $line, $ic+1 );
	    if( $rstr =~ /^((?:\[:\(\w+\):\]|[^]-](?:-[^]])?)*)/ ){
	        my $cnt = $1;
		$cnt =~ s/\[:blank:\]/ \t/g;
		$ic += length( $cnt );
		$cnt =~ s{([\$/])}{\\$1}g; # $ / are magic in Perl /[]/
		# try some simplifications
		my $red = $cnt;
		if( $red =~ s/0-9// ){
		    $cnt = $red.'\d';
		    if( $red =~ s/A-Z// && $red =~ s/a-z// && $red =~ s/_// ){
			$cnt = $red.'\w';
		    }
		}
		$add .= $cnt;
	    }
            ## may have a trailing `-' before `]'
            if( $ic < length($line) - 1 &&
		substr( $line, $ic+1 ) =~ /^(-?])/ ){
                $ic += length( $1 );
		$add .= $1;
		$add =~ s/^\[(\^?)(\\[dw])]$/ $1 eq '^' ? uc($2) : $2 /e;
                $res .= $add;
            } else {
                Error( "Bad RE `$line': unmatched `['" );
                return undef();
	    }
  	    inc( $maxlen, $lastlen = 1 );

        } elsif( $c eq '(' ){ ## ( => (?:
	    push( @curlen, $maxlen = 0 );
            push( @parlev, $maxlen );
	    $res .= '(?:';
	} elsif( $c eq ')' ){ ## ) => )
	    $parlev[-1] = $maxlen if $maxlen > $parlev[-1];
            $lastlen = pop( @parlev );
	    if( @parlev == 0 ){
                Error( "Bad RE `$line': unmatched `)'" );
                return undef();
            }
            $maxlen = pop( @curlen );
	    inc( $maxlen, $lastlen );
            $res .= ')';

        } elsif( $c eq '{' ){ ## repetition factor {<i>[,[<j>]]}
	    my $endpos = index( $line, '}', $ic );
            if( $endpos < 0 ){
                Error( "Bad RE `$line': `{' without closing `}'" );
                return undef();
	    }
            my $rep = substr( $line, $ic+1, $endpos-($ic+1) );
            if( exists( $Define{$rep} ) ){
		$res .= '(?:';
  	        push( @curlen, $maxlen = 0 );
                push( @parlev, $maxlen );
                substr( $line, $ic+1, $endpos-$ic ) = $Define{$rep} . ')';
            } elsif( $maxlen == 0 ){
		Error( "Bad RE `$line': `{$rep}' without preceding RE" );
		return undef();
	    } elsif( $rep =~ /^(\d+)(,?)(\d*)?$/ ){
                $ic = $endpos;
                my $min = $1;
                my $com = $2 || '';
		my $max = $3;
                my $app = "{$min$com$max}";
		if( length( $max ) ){
		    if( $max < $min ){
			Error( "Bad RE `$line': maximum less than minimum in `{$rep}'" );
			return undef();
		    }
                } elsif( $com eq ',' ){
		    $max = '';
		    $maxlen = $lastlen = $INF;
                } else {
                    $max = $min;
		}
                if( $max ne '' ){
		    if( $res =~ '\)$' ){
                        $maxlen += $lastlen*($max-1) if $lastlen != $INF;
                    } else {
		        inc( $maxlen, $lastlen = $max );
                    }
                }
                $res .= $app;
            } else {
                Error( "Bad RE `$line': invalid repeat clause `{$rep}'" );
                return undef();
	    }

        } elsif( $c eq ']' ){ ## unmatched ] is not magic
	    $res .= ']';
	    inc( $maxlen, $lastlen = 1 );

	} elsif( $c eq '^' ){ ## not magic unless 1st, but in Perl: \-quote
	    $res .= length( $res ) ? '\\^' : '^';
	    inc( $maxlen, $lastlen = 1 );

        } elsif( $c =~ /[ \t]/ ){ ## unescaped white space terminates pattern 
	    last;

	} else {
	    $res .= $c;
	    inc( $maxlen, $lastlen = 1 );
	}
    }
    $parlev[-1] = $maxlen if $maxlen > $parlev[-1];

    if( @parlev > 1 ){
        Error( "Bad RE `$line': unbalanced open `('" );
        return undef();
    }
    substr( $line, 0, $ic ) = '';
    $line =~ s/^\s+//;

    return ( $res, $parlev[-1], $trailer, $line,
             $begline, $endline );
}


############
### MAIN ###
############

# option parsing
#

die( "$0: errors in options\n" )
  unless GetOptions( 'debug!'      => \$doDebug,
                     'caseignore!' => \$doFold,
                     'help!'       => \$doHelp,
                     'main!'       => \$doMain,
                     'output=s'    => \$Output,
                     'package=s'   => \$Package,
                     'skipdef!'    => \$doSkip,
                     'verbose!'    => \$doVerbose );

if( $doHelp ){
    print STDERR "$0 [--debug] [--foldcase] [--help] [--main] [--output=file]\n";
    print STDERR ' ' x length( $0 ), " [--package=string] [--skipdef] [--verbose] file...\n";
    exit( 0 );
}

print STDERR "$0 version $Version\n" if $doVerbose;

# read & process all input files
#
unshift( @ARGV, '-' ) unless @ARGV;
openARGV();

while( getsARGV() ){
    chomp();
    if( $state eq 'defs' ){

        # definitions
        #
        if( $verba ){
            if( /^%}\s*$/ ){
	        $verba = 0;
            } else {
	        addCode( 'def', "$_\n" );
            }
	} elsif( /^\s*$/ ){
	    # skip empty lines
	} elsif( /^#/ ){
	    # skip unindented comment lines
        } elsif( /^%%\s*$/ ){
	    $state = 'rule';
	} elsif( /^([[:alpha:]_][[:alpha:]_[:digit:]-]*)\s+(.*)$/ ){
	    my( $def, $val ) = ( $1, $2 );
	    if( exists( $Define{$def} ) ){
	        Error( "redefining $def\n" );
            }
	    $Define{$def} = $val;

        } elsif( /^%([sx])\s+(.*)$/ ){
	    my $kind = $1;
	    my @states = split( ' ', $2 );
	    for my $state ( @states ){
                Error( "duplicate state definition" )
	              if exists( $StartCond{$state} );
		$StartCond{$state} = $kind; # 's' or 'x'
            }

        } elsif( /^\s+/ ){
	    addCode( 'def', "$_\n" );
        } elsif( /^%{\s*$/ ){
	    $verba = 1;
        } else {
	    die( "unexpected input in definition section" );
        }

    } elsif( $state eq 'rule' ){

        # rules
        #
        if( $verba ){
            if( s/^%}\s*$// ){
	        $verba = 0;
            } else {
                if( @Rule ){
		    $Rule[-1]{action} .= "$_\n";
                } else {
                    addCode( 'init', "$_\n" );
                }
            }
	} elsif( /^\s*$/ ){
	    # skip blank lines
        } elsif( /^#/ ){
	    # skip unindented comments
        } elsif( /^%%\s*$/ ){
	    $state = 'code';
        } elsif( /^%{\s*$/ ){
	    $verba = 1;
	} elsif( /^\S/ ){
            my @starts = ();
            # check for start conditions
            if( s/^<([^>]+)>// ){
    	        @starts = map { s/^\s+//; s/\s+$//; $_ } split( ',', $1 );
            }
            my( $pat, $mlen, $trail, $act, $bol, $eol ) = parseRule( $_ );
            next unless defined( $pat ); # error parsing rule
            my $opat;
            if( length( $act ) ){
                $opat = substr( $_, 0, index( $_, $act ) - 1 );
            	$act .= "\n";
            } else {
                $opat = $_;
            }

	    my $nxtact = 0;
	    if( $act =~ s/^%{\s*$// ){
                $verba = 1;
            } elsif( $act =~ s/^\|\s*$// ){
                $nxtact = 1;
            }

            # check start conditions
            for my $c ( @starts ){
                Error( "undeclared start condition `$c'" )
                  unless exists( $StartCond{$c} );
            }

	    push( @Rule, { conds    => \@starts,
			   pattern  => $pat,
			   original => $opat,
			   maxlen   => $mlen,
			   trailer  => $trail,
                           begline  => $bol,
                           endline  => $eol,
			   nextact  => $nxtact,
			   action   => $act,
                           srcline  => "$TheFile:$." } );
            


        } else {
	    Error( "unexpected input in rule section" );
        }

    } else {
	addCode( 'user', "$_\n" );    
    }
}

die( "errors in input\n" ) if $nError;


# add default rule
#
if( ! $doSkip ){
    push( @Rule, { conds    => [ '*' ],
	           pattern  => '.|\n',
	           original => '.|\n',
	           maxlen   => 1,
                   trailer  => 0,
	           begline  => 0,
	           endline  => 0,
	           nextact  => 0,
	           action   => "Echo;\n",
                   srcline  => "default" } );
}

# compute guards
#
my $astguard = '1';
my $no_guard = '1';
my $iguard = 1;
delete $StartCond{INITIAL};
delete $StartCond{'*'};
for my $key ( sort( keys( %StartCond ) ) ){
    my $cat = $StartCond{$key};
    $no_guard .= $cat eq 's' ? '1' : '0';
    $astguard .= '1';
    $StartCond{$key} = { cat => $cat, int => $iguard++ };
}
$StartCond{INITIAL} = { cat => 's', int => 0 };

for( my $ir = 0; $ir < @Rule; $ir++ ){
    my $guard;
    if( @{$Rule[$ir]{conds}} ){
	$guard = '0' x $iguard;
        for my $c ( @{$Rule[$ir]{conds}} ){
	    if( $c eq '*' ){
		$guard = $astguard;
		last;
	    }
	    substr( $guard, $StartCond{$c}{int}, 1 ) = '1';
        }
    } else {
	$guard = $no_guard;
    }
    $Rule[$ir]{guard} = $guard;

    my $action = $Rule[$ir]{action};

    # Echo -> print $yytext to current output
    $action =~ s/Echo\s*(?:\(\s*\))?/print \$yyout \$yytext/g;

    # Begin(<name>) -> yy_start( <number> )
    $action =~ s{Begin(?:\s+([[:alpha:]_][\w-]*)|\s*\(\s*([[:alpha:]_][\w-]*)\s*\))}
                {'yy_start(' . trStartCond($1||$2) . ')'}ge;

    # Reject -> goto yyLr
    $noReject &&= $action !~ s/Reject(?:\s*\(\s*\))?/goto yyLr/g;

    # Cut -> goto yyLr
    if( $action =~ s/Cut(?:\s*\(\s*\))?\s*;?//g ){
        $noCut = 0;
        $Rule[$ir]{cut} = 1;
    }

    $Rule[$ir]{action} = $action;
}

# optimize guard tests
#   {nxtguard} := number of subsequent rule with different guard | 0 = no guard

for( my $ir = 0; $ir < @Rule; $ir++ ){
    my $guard = $Rule[$ir]{guard};
    my $jr;
    for( $jr = $ir + 1; $jr < @Rule; $jr++ ){
        if( $Rule[$jr]{guard} eq $guard ){
            $Rule[$jr]{nxtguard} = 0;
        } else {
            last;
        }
    }
    $Rule[$ir]{nxtguard} = $jr;
    $ir = $jr - 1;
}

if( $doVerbose ){
    print STDERR "Definitions:\n";
    for my $key ( sort( keys( %Define ) ) ){
        printf STDERR "  %-15s %s\n", $key, $Define{$key};
    }

    print STDERR "Start Conditions:\n";
    for my $key ( sort( keys( %StartCond ) ) ){
        printf STDERR "  %3d %s (%sclusive)\n",
                      $StartCond{$key}{int}, $key,
                      $StartCond{$key}{cat} eq 's' ? 'in' : 'ex';
    }

    print STDERR "Rules:\n";
    for( my $ir = 0; $ir < @Rule; $ir++ ){
        print STDERR "  rule $ir (at $Rule[$ir]{srcline}):\n";
        print STDERR "    conditions:  " .
                     join( ',', @{$Rule[$ir]{conds}} ) . "\n";
        print STDERR "    pattern (l): " . $Rule[$ir]{original} . "\n";
        print STDERR "    pattern (p): " . $Rule[$ir]{pattern}  . "\n";
        print STDERR "    begline:     " . $Rule[$ir]{begline};
        print STDERR "    endline:     " . $Rule[$ir]{endline};
        print STDERR "    maxlen:      " .
                     ( $Rule[$ir]{maxlen} == $INF ? 'unlimited'
                                                  : $Rule[$ir]{maxlen} ) .
                     "\n";
        print STDERR "    actions:     " . $Rule[$ir]{action};
        print STDERR "\n" unless length( $Rule[$ir]{action} );
        print STDERR "    guard:       " . $Rule[$ir]{guard} . "\n";
    }

    print STDERR "Definition Code:\n",
                 exists( $Code{def} ) ? $Code{def}{text} : '';
    print STDERR "Initialization Code:\n",
                 exists( $Code{init} ) ? $Code{init}{text} : '';
    print STDERR "User Code:\n",
                 exists( $Code{user} ) ? $Code{user}{text} : '';
}

######
# Code Generation
#
my $fh;
if( defined( $Output ) ){
    open( $fh, ">$Output" )
    || die( "$0: $Output: error opening for output ($!)\n" );
} else {
    $fh = *STDOUT;
}

if( $doMain ){
    print $fh "#!/usr/bin/perl -w\n\n";
}

### >>>>>>>>>
print $fh <<TheEnd;
package $Package;
# generated by $0 version $Version

use strict;
use Carp;

use vars qw{ \$yyin \$yytext \$yyleng \$yyout \$yy_plex_debug };
\$yyin   = *STDIN;
\$yytext = '';
\$yyleng = 0;
\$yyout  = *STDOUT;
\$yy_plex_debug = $doDebug;

our( \$yyCurBuf, \$yyAppToken, \@yyRule, \@yyMatchlen, \%yyCond );
\$yyCurBuf = ${Package}::Buffer->yy_create_buffer( \\\$yyin );

sub yy_current_buffer(){
    return \$yyCurBuf;
}
TheEnd
### <<<<<<<<<

emitCode( 'def', $fh );

# Define start conditions

for( my $ir = 0; $ir < @Rule; $ir++ ){
    my $ppat = $Rule[$ir]{pattern};
    $ppat =~ s/\\/\\\\/g;
    $ppat =~ s/'/\\'/g;
    my $opat = $Rule[$ir]{original};
    $opat =~ s/\\/\\\\/g;
    $opat =~ s/'/\\'/g;
    print $fh "\$yyRule[$ir] = { pattern  => '" . $ppat . "',\n";
    print $fh "  original => '" . $opat . "',\n";
    print $fh "  guard    => '" . $Rule[$ir]{guard}   . "',\n";
    print $fh "  maxlen   => "  . $Rule[$ir]{maxlen}  . ",\n";
    print $fh "  begline  => "  . $Rule[$ir]{begline} . ",\n";
    print $fh "  endline  => "  . $Rule[$ir]{endline} . ",\n";
    print $fh "  srcline  => '" . $Rule[$ir]{srcline} . "',\n";
    print $fh "};\n";
}

print $fh "%yyCond = (\n";
for my $cond ( keys( %StartCond ) ){
    print $fh "   $cond => $StartCond{$cond}{int},\n";
}
print $fh ");\n";

### >>>>>>>>>
print $fh <<'[TheEnd]';
# Start Conditions
#
# CurCond indicates the currently set condition
#
my $CurCond   = 0; ## INITIAL == 0, hardcoded
my @CondStack = ();

# yy_start
#
sub yy_start($){
    confess( "usage: yy_start([<condition>])" ) unless @_ <= 1;
    if( @_ ){
	my $cond = shift();
	if( $cond =~ /^\d+$/ ){
	    $CurCond = $cond;
	} elsif( defined( $yyCond{$cond} ) ){
	    $CurCond = $yyCond{$cond};
	} else {
	    confess( "yy_start: condition `$cond' not numeric, not known" );
	}
[TheEnd]

if( $doDebug ){
    print $fh <<'[TheEnd]';
        if( $yy_plex_debug ){
	    for my $c ( keys( %yyCond ) ){
		if( $CurCond == $yyCond{$c} ){
		    print STDERR "-- Begin $c\n";
		    last;
		}
	    }
        }
[TheEnd]
}

print $fh <<'[TheEnd]';
    }
    return $CurCond;
}

# Start Condition Stack (not in POSIX)
#
sub yy_push_state($){
    push( @CondStack, $CurCond );
    $CurCond = shift();
}

sub yy_pop_state($){
    confess( "start condition stack underflow" ) unless @CondStack;
    $CurCond = pop( @CondStack );
}

sub yy_top_state($){
    confess( "start condition stack underflow" ) unless @CondStack;
    return $CondStack[-1];
}

# unput: return a string to the buffer
#
sub unput($){
    my $s = shift();
    my $l = length( $s );
    if( $yyCurBuf->{eaten} >= $l ){
	$yyCurBuf->{eaten} -= $l;
        substr( $yyCurBuf->{buffer}, $yyCurBuf->{eaten}, $l ) = $s;
    } else {
        $yyCurBuf->{buffer} = $s . substr( $yyCurBuf->{buffer}, $yyCurBuf->{eaten} );
	$yyCurBuf->{eaten} = 0;
    }
}

# yyless: return n characters from the end of the current token to the buffer
#
sub yyless($){
    my $n = shift();
    if( $n < length( $yytext ) ){
	unput( substr( $yytext, $n ) );
	substr( $yytext, $n ) = '';
	$yyleng = $n;
    }
}

# input: get next character from the current input buffer
#
sub input(){
    if( $yyCurBuf->{eaten} >= length( $yyCurBuf->{buffer} ) ){
        my $rl = &{$yyCurBuf->{YY_INPUT}}();
	return undef() if $rl == 0;
    }
    my $c = substr( $yyCurBuf->{buffer}, $yyCurBuf->{eaten}, 1 );
    $yyCurBuf->{eaten}++;
    return $c;
}

# yymore: set yyAppToken flag, to append the next token to $yytext
#
sub yymore(){
    $yyAppToken = 1;
}

[TheEnd]

print $fh "package ${Package}::Buffer;\nuse strict;\n\n";

print $fh <<'[TheEnd]';
# Buffer handling
#
# yy_create_buffer: constructor with filehandle
#
sub yy_create_buffer($$%){
    my( $class, $handle, %opts ) = @_;
    my $obj = {
	handle   => $handle,
	buffer   => '',
	eaten    => 0,
	atEOF    => 0,
        YYWRAP   => \&yywrap,
        YY_INPUT => \&yy_input,
        %opts
    };
    bless( $obj, $class );
}

# yy_new_buffer: constructor; alias for yy_create_buffer
#
sub yy_new_buffer($$@){
    my( $class, $refhandle, @opts ) = @_;
    yy_create_buffer( $class, $refhandle, @opts );
}

# yy_switch_to_buffer: method to make object the current buffer
#
sub yy_switch_to_buffer($){
    $yyCurBuf = shift();
}

# yy_delete_buffer: destructor
#
sub yy_delete_buffer($){
    my $self = shift();
    die( "cannot delete current buffer\n" ) if $self == $yyCurBuf;
    my $fh;
    close( $fh ) if defined( $fh = $self->{handle} );
    undef( %$self );
}

# yy_flush_buffer: method to empty current buffer
#
sub yy_flush_buffer($){
    my $self = shift();
    $self->{buffer} = '';
    $self->{eaten} =  0;
}

# yy_scan_string: constructor with string
#
sub yy_scan_string($$%){
    my( $class, $string, %opts ) = @_;
    my $obj = {
	handle   => undef(),
	buffer   => $string,
        eaten    => 0,
        YYWRAP   => sub { 1 },
        YY_INPUT => sub { 0 },
        %opts
    };
    bless( $obj, $class );
}

# yy_scan_bytes: constructor with initial substring of a string
#
sub yy_scan_bytes($$$%){
    my( $class, $string, $len, %opts ) = @_;
    my $obj = {
	handle   => undef(),
	buffer   => substr( $string, 0, $len ),
        eaten    => 0,
        YYWRAP   => sub { 1 },
        YY_INPUT => sub { 0 },
        %opts
    };
    bless( $obj, $class );
}

# yy_input: default method for reading input for a buffer with filehandle
#
sub yy_input($){
    my $self = shift();
    my $fh = $self->{handle};
    if( defined( $fh ) ){
	return 0 if $self->{atEOF};
	$fh = $$fh if ref( $fh );
	my $savsep = $/;
	$/ = undef();
	$self->{buffer} = <$fh>;
	$/ = $savsep;
	$self->{atEOF} = 1;
        return length( $self->{buffer} ) if defined( $self->{buffer} );
    }
    return 0;
}

# YY_INPUT: method for setting/calling the yy_input method
#
sub YY_INPUT($;$){
    my( $self, $input ) = @_;
    if( defined( $input ) ){
        $self->{YY_INPUT} = $input;
    } else {
        &{$self->{YY_INPUT}}( $self );
    }
}


# yywrap: default method for answering: Is there more input after EOF?
#
sub yywrap(){
    1;
}

# YYWRAP: method for setting/calling the yywrap method
#
sub YYWRAP($;$){
    my( $self, $wrap ) = @_;
    if( defined( $wrap ) ){
        $self->{YYWRAP} = $wrap;
    } else {
        my $rc = &{$self->{YYWRAP}}( $self );
        if( ! $rc ){
	    $self->{atEOF} = 0;
	    $self->yy_flush_buffer();
        }
	return $rc;
    }
}


# yyrestart:
#   $buffer->yyrestart( $handle ) ... acts on specified buffer
#   yyrestart( $handle )  ... acts on current buffer
#
sub yyrestart($;$){
    my( $self, $fh );
    if( ref( $_[0] ) ){
        ( $self, $fh ) = @_;
    } else {
	( $self, $fh ) = ( $yyCurBuf, $_[0] );
    }
    $self->{handle} = $fh;
    $self->{buffer} = '';
    $self->{eaten}  =  0;
}

[TheEnd]

print $fh <<TheEnd;
package ${Package};

# yylex: the scanner routine
#
sub yylex{
  # User's yylex initialization
TheEnd

emitCode( 'init', $fh );

print $fh <<'[TheEnd]';
    $yyAppToken = 0;

    while( 1 ){
	my $yyBufref = \$yyCurBuf->{buffer};
	while( $yyCurBuf->{eaten} >= length( $$yyBufref ) ){
	    if( ! $yyCurBuf->YY_INPUT() ){
		return undef() if $yyCurBuf->YYWRAP();
		$yyBufref = \$yyCurBuf->{buffer};
	    }
	}

        my $yyml;
	my $yyMmlen = -1;
	my $yyMmpos = -1;
	my $yyEaten  = $yyCurBuf->{eaten};
[TheEnd]
### <<<<<<<<<

print $fh '@yyMatchlen = ( 0 ) x ' . @Rule . ";\n";

my( $in, $ng );
my $iopt = $doFold ? 'i' : '';
for( my $ir =  0; $ir < @Rule; $ir++ ){
    my $ruleref = $Rule[$ir];
    $in = $ir + 1;
    $ng = $ruleref->{nxtguard};
    print $fh "yyS$ir:\n";
    if( $ng > 0 ){
        print $fh "        goto yyS$ng unless substr( \"$ruleref->{guard}\", \$CurCond, 1 );\n";
    }
    if( $noReject && $ruleref->{maxlen} < $INF ){
        print $fh "        goto yyS$in if \$yyMmlen >= $ruleref->{maxlen};\n";
    }
    if( $ruleref->{begline} ){
        print $fh "        goto yyS$in if \$yyEaten > 0 && substr( \$\$yyBufref, \$yyEaten-1, 1 ) ne \"\\n\";\n";
    }

print $fh <<TheEnd;
        pos( \$\$yyBufref ) = \$yyEaten;
	if( \$\$yyBufref =~ /\\G($ruleref->{pattern})/g$iopt ){
	    \$yyMatchlen[$ir] = \$yyml = length( \$1 );
TheEnd

    if( $ruleref->{endline} ){
        print $fh "            goto yyS$in if \$yyEaten + \$yyml < length( \$\$yyBufref ) && substr( \$\$yyBufref, \$yyEaten+\$yyml, 1 ) ne \"\\n\";\n";
    } 

print $fh <<TheEnd;
	    if( \$yyml > \$yyMmlen ){
		 \$yyMmlen = \$yyml;
		 \$yyMmpos = $ir;
	    }
TheEnd

    if( $ruleref->{cut} ){
	print $fh "            goto yyLx if \$yyml;\n";
    }
    print $fh "        }\n";
}

print $fh "yyS$in: ;\n";
if( $doSkip ){
    print $fh <<'[TheEnd]';
        my $eol = index( $$yyBufref, "\n", $yyEaten );
        die( "not matched: " . substr( $$yyBufref, $yyEaten, $eol+1-$yyEaten ) . "\n" );
[TheEnd]
}

### >>>>>>>>>
print $fh <<TheEnd;
      yyLx:
	die( "internal error" ) if \$yyMmpos == -1;

	if( \$yyAppToken ){
	    \$yytext .= substr( \$\$yyBufref, \$yyEaten, \$yyMmlen );
	    \$yyAppToken = 0;
	} else {
	    \$yytext = substr( \$\$yyBufref, \$yyEaten, \$yyMmlen );
	}
        \$yyleng = length( \$yytext );
        \$yyCurBuf->{eaten} += \$yyMmlen;
TheEnd

if( $doDebug ){        
    print $fh <<'[TheEnd]';
        print STDERR "-- #$yyMmpos ($yyRule[$yyMmpos]{srcline}): $yyRule[$yyMmpos]{original} matches \"$yytext\"\n" if $yy_plex_debug;
[TheEnd]
}

print $fh <<'[TheEnd]';
	goto "yyL$yyMmpos";
[TheEnd]
### <<<<<<<<<

for( my $ir = 0; $ir < @Rule; $ir++ ){
    # `#line' must precede the label
    if( length( $Rule[$ir]{action} ) &&
        $Rule[$ir]{srcline} =~ /^(.*):(\d+)$/ ){
	my( $file, $line ) = ( $1, $2 );
	$file =~ s/\\/\\\\/g;
	$file =~ s/"/\\"/g;
	 print $fh "#line $line \"$file\"\n";
    }
    print $fh "yyL$ir:";
    if( ! $Rule[$ir]{nextact} ){
	print $fh "\n";
	if( length( $Rule[$ir]{action} ) ){
	    print $fh $Rule[$ir]{action};
	}
        print $fh "  next;\n";
    } else {
	print $fh "; "; # required after a label
    }
}

if( ! $noReject ){
### >>>>>>>>>
    print $fh <<'[TheEnd]';
      yyLr:
[TheEnd]


    if( $doDebug ){
        print $fh <<'[TheEnd]';
        print STDERR "-- Reject"\n" if $yy_plex_debug;
[TheEnd]
    }

    print $fh <<'[TheEnd]';
	unput( $yytext );
        $yyMatchlen[$yyMmpos] = 0;
	$yyMmlen = 0;
	$yyMmpos = -1;
	for( my $irule =  0; $irule < @yyRule; $irule++ ){
	    next unless substr( $yyRule[$irule]{guard}, $CurCond, 1 );
   	    my $ml;
	    if( ($ml = $yyMatchlen[$irule]) > $yyMmlen ){
		$yyMmlen = $ml;
		$yyMmpos = $irule;
	    }
	}
	goto yyLx;
[TheEnd]
### <<<<<<<<<
}

### >>>>>>>>>
print $fh <<TheEnd;
    }
}

TheEnd

emitCode( 'user', $fh );

print $fh "1;\n";
### <<<<<<<<<

if( $doMain ){
    print $fh <<TheEnd;

package main;
use strict;
while( my \@res = ${Package}::yylex() ){
    last unless defined( \$res[0] );
    print join( ', ', \@res ), "\\n";
}
TheEnd
}




=head1 NAME

plex - Perl lexical analyzer generator

=head1 SYNOPSIS

Z<> Z<> Z<> Z<>B<plex> [I<options>...] [I<file>...]

=head1 DESCRIPTION

The B<plex> utility is a tool for generating a Perl module that performs
pattern matching on text, such as commonly required for compilers and
similar parsing tasks. B<plex> reads the given input files, or its
standard input if no file names are given, for a description of the
scanner to generate. The body of this description defines pairs of
regular expressions and snippets of Perl code, called I<rules>. After
matching the regular expressions against the scanner's input, one of
the rules is selected, resulting in the execution of its code.

B<plex> generates as output a Perl module (a package plex) on a file
F<plex.pm>. This package may then be used in a Perl program, where
its main subroutine C<plex::yylex> can be called to process the
scanner program's input.

B<plex> is similar (but, for obvious reasons, not conforming) to 
the I<lex> tools found on UNIX systems: Given a I<lex> input file,
the C code pieces in the rules have to be rewritten as Perl code.
Note, however, that neither the patterns of I<lex> rules nor the
definitions of named patterns and start conditions require any changes.


=head1 FORMAT OF THE INPUT FILE

A B<plex> input file consists of three sections, separated by a line
with just B<%%> in it:

   definition section
   %%
   rule section
   %%
   user code section


=head2 Definition Section

The definition section may contain I<name definitions>,
I<start conditions>, and I<definition code>. Comment lines may be
written in the definitions section using `C<#>' as the first
character of a line. 

=over 4

=item * I<Name definition>

A name definition must begin in column one. It associates a I<name>
with a I<pattern>. A I<name> begins with a letter or underscore
(`C<_>') and may be followed by letters, digits, underscores or
dashes (`C<->'). The C<pattern> begins with the first non-white-space
character after the name continuing to the end of the line. The name
may be referred to in the pattern of a rule: using B<{>I<name>B<}>
as part of a pattern substitutes the named pattern.

=item * I<Start condition>

Start conditions are defined in lines beginning with either `C<%s>'
or `C<%x>' in column one, followed by some white space and a
white-space separated list of names. Each name may then be
used as a start condition for a rule: A rule prefixed by one or
more start conditions will only be considered if the currently
active start condition is among them. Rules without a start
condition are active if the current start condition is an
I<inclusive> start condition (defined with `C<%s>'). If, however,
the current start condition is an I<exclusive> start condition
(defined with `C<%x>') all> rules not naming this condition are
disabled.

=item * I<Definition code>

Any indented line is assumed to contain Perl code, to be added to
the generated output. Also, a line containing just `C<%{>' indicates
that subsequent lines contain Perl  code, terminated by a line with nothing
but `C<%}>'. Definition code will be incorporated into the generated
package. It may be used for the definition of package global variables
and initializations.

=back


=head2 Rule Section

Preceding the first rule, some I<initialization code> may be written,
beginning with an indented line containing `C<%{>' and 
ending with a line containing a trailing `C<%}>'.
This code will be copied into the entry section of the
scanning routine C<plex::yylex>.

Any unindented line defines a I<rule>, consisting of an optional
I<condition list>, a I<pattern>, that ends with the first non-escaped
white space, and an I<action>.

A I<pattern> is written using an extended set of regular expressions:

 x      match the character `x' (unless x is magic)
 .      match any character except newline
 []     match a single character, as enumerated between
        the brackets. Ranges are defined with `-'. A
        leading `^' inverts the sense of matching. `\', '-'
        and `]' are magic (but can be \-escaped).

 r*     zero or more r's, where r is any regular expression
 r+     one or more r's
 r?     zero or one r's
 r{i,j} from i to j r's
 r{i,}  i or more r's
 r{i}   exactly i r's

 {name} the expansion of `name', enclosed in `(' and ')'
 "x"    the literal string `x'. Use `\' to escape `"' (and `\').
 \X     If X is `a', `b', `f', `n', `r', `t', or `v': the ASCII
        control character (as in C); otherwise the literal `X',
        used to escape charactes such as `*'.
 \0     the NUL character
 \ddd   the character with octal value ddd
 \xhh   the character with hexadecimal value hh

 (r)    matches RE r; parentheses are used for overriding
        precedence
 rs     the RE r followed by RE s
 r|s    either RE r or RE s

 r/s    matches r but only if RE s follows ("trailing context")
 ^r     matches r but only at the beginning of an input line
 r$     matches r but only at the end of an input line

Character classes may also contain the following I<class expressions>:

    [:alnum:]   [:alpha:]   [:ascii:]    [:blank:]
    [:cntrl:]   [:digit:]   [:graph:]    [:lower:]
    [:print:]   [:punct:]   [:space:]    [:upper:]
    [:word:]    [:xdigit:]

A negated character class not containing the newline character
matches "\n". 

A substituted pattern (using B<{>I<name>B<}>) is treated as if its
expansion were wrapped in parentheses.

A trailing context expression is used for determining a match (the
matched length will determine the ranking), but the corresponding
portion of the input will neither be made available in the variable
B<$yytext> nor will it be consumed.

The anchors `^' and `$' can only appear at the begin or end of
a pattern, respectively, and they cannot be used inside
parentheses. The pattern `C<^foo|bar>' matches both `C<foo>' or
`C<bar>', but both only at the beginning of a line.

A condition list is a comma-separated list of I<start conditions>,
enclosed in `C<E<lt>>' and `C<E<gt>>'. The construct `C<E<lt>*E<gt>>'
is an abbreviation for the list containing all start conditions
(including the default start condition C<INITIAL>).

An I<action> consists of a line of Perl code. Multiple lines may
be written by enclosing the code in `C<%{>' and `C<%}>'. The symbol
`C<|>' indicates that the action for the next rule is the action for
this rule. If there is no code, the matched text is ignored.

Whether there are any rules or not, an additional default rule is
added at the end. Its pattern is C<.|\n> (matching any character),
and the action is C<Echo>, i.e. the character is copied to the
output file.


=head2 User Code

All lines of the user code section will be appended to the
generated module.


=head1 HOW THE SCANNER PROCESSES ITS INPUT

Whenever the scanner routine B<yylex> in the generated module is
called it analyzes the input stream at the current position for
any matches with the patterns in the rules. The rule where the
longest portion of the input matches "wins"; when there is more
than one rule matching equal lengths, the rule that comes first
in the input file is preferred.

Once a match has been determined in this way, the corresponding
section of the input text is stored in B<$yytext>, and its length
in B<$yyleng>. Then the action of the rule is executed. This may
result in a return to the caller of B<yylex> (if so programmed),
the rejection of this match (again, by explicit programming),
or else continuation with the next scanning cycle.

Input data is handled through objects of the class C<plex::Buffer>.
The default buffer uses the filehandle stored in B<$yyin>,
which is initially set to C<STDIN>. To read from any other file
than C<STDIN>, you may (before the first call to B<yylex>):

=over 4

=item *

set B<$yyin> to some filehandle of your choice, or

=item *

apply the method B<yyrestart> with a filehandle argument to the
current input buffer object, which is available via B<yy_current_buffer>.

=back

Whenever the scanner reaches the end of a buffer object's data, the
method B<yywrap> is called. If it returns false, the scanner assumes
that another input file has been made available and continues reading
on the same buffer object. If B<yywrap> returns true, B<yylex> 
terminates scanning with a return value of C<undef>.
To access a sequence of files through the same buffer, the
buffer's B<yywrap> method has to be overwritten by subclassing or
simply by calling the method B<YYWRAP> with the rewritten method's
address as argument.

Reading input from a buffer object's file handle is done with the
method B<yy_input>, which reads the entire file into the buffer.
Other input strategies may be implemented, again by overwriting
B<yy_input> in a subclass, or by calling the method B<YY_INPUT> with
the new method's address as argument.

The example below shows how to redefine the B<yywrap> and B<yy_input>
methods for the default buffer, to process all files stored in C<@ARGV>,
and to read one line at a time.

   %{
     sub myWrap();
     die( "no input file\n" ) if myWrap();
     yy_current_buffer->YYWRAP( \&myWrap );
     yy_current_buffer->YY_INPUT( \&myInput );
   %}

   %%
   %%

   sub myWrap(){
     return 1 unless @ARGV;
     my $path = shift( @ARGV );
     open( FH, "<$path" ) || die( "$path: cannot open ($!)\n" );
     $plex::yyin = *FH;
     return 0;
   }

   sub myInput(){
     my $self = shift();
     my $fh = $self->{handle};
     if( defined( $fh ) ){
       return 0 if $self->{atEOF};
       $fh = $$fh if ref( $fh );
       if( defined( $self->{buffer} = <$fh> ) ){
         $self->{eaten} = 0;
         return length( $self->{buffer} );
       } else {
  	 $self->{atEOF} = 1;
         return 0;
       }
     } else {
       return 0;
     }
   }


=head1 ACTION PROGRAMMING

Actions can include arbitray Perl code, including return statements
to terminate the parsing loop and return a value to whichever routine
called B<yylex>. Each time B<yylex> is called, it proceeds to match
input from where it left off until it either reaches the end of the
input or a return is executed.

There are a number of predefined variables and functions available
for use within the action of a rule.

=head2 Variables

The variable B<$yytext> is set to the portion of the input buffer
matched by the winning rule's pattern. B<$yyleng> contains the length
of that string. You are free to modify either value.

B<$yyin> contains the filehandle for the default input file (STDIN).
It may be assigned to before processing begins, or after input has
been exhausted, to continue with another file.

B<$yyout> is the filehandle the B<Echo> directive uses for output.
It may be reassigned by the user.

These variables are accessible from code outside the C<plex> package.


=head2 Special Directives

The statement B<Echo> copies B<$yytext> to the scanner's output. (It
is translated to C<print> C<$plex::yyout> C<$plex::yytext>.)

A B<Begin> followed by the name of a start condition places the scanner
in the specified start condition. The start condition name may be
enclosed in parentheses. The default start condition may be specified
with C<INITIAL> or C<0>. B<Begin> is translated into a call to
B<yy_start(>I<n>B<)>, with the appropriate argument, i.e. the condition
name translated to some integer value.

The B<Reject> directive instructs the scanner to proceed with the
next best rule. Actions preceding B<Reject> are executed before the
branch into which B<Reject> is translated is taken. B<$yytext> and
B<$yyleng> are adjusted accordingly before the action of the next
best rule is executed.

A B<Cut> directive anywhere in the action of a rule causes immediate
termination of the pattern matching cycle if the pattern of that rule
matches with a length greater than zero, and the action of the best
match found so far will be executed. Note that after a B<Reject> in
that action only those rules will be reconsidered that were matched
prior to the B<Cut>.

   [ \t\n]+     Cut; # no need to try any other rule
   if|then|else Cut; return( 'KWD', $yymatch );

All of these directives cannot be used except in rule action code.


=head2 Functions for Controlling the Input Stream

The B<yymore(>I<n>B<)> function notifies the scanner to I<append> the
matched portion of the input to B<$yytext> next time a match occurs.

The function B<yyless(>I<n>B<)> takes an integer argument and makes
the scanner return all but the first I<n> characters of the current
match back to the input stream. B<$yytext> and B<$yyleng> are adjusted
accordingly.

Calling B<unput(>I<s>B<)> puts the string I<s> back onto the input
stream. Scanning will resume with the first character of that string.
(It is not an error to put back more characters than have been consumed
from a buffer, but doing so may slow down the scanner.)

The function B<input> reads the next character from the input stream.


=head2 Buffer Handling

B<plex::Buffer-E<gt>yy_create_buffer(>I<handle>B<)> creates a buffer object.
(B<yy_new_buffer> is an alias for B<yy_create_buffer>.) The I<handle> is the
file handle to be used for reading data. - Additional options can be
specified:

=over 4

=item B<YYWRAP =E<gt>> I<code-ref>

Defines a B<yywrap> method to be used when the input in this buffer
is exhausted.

=item B<YY_INPUT =E<gt>> I<code-ref>

Defines a B<yy_input> method used for reading data from the buffer's
file handle.

=back

B<$>I<buffer>B<-E<gt>yy_switch_to_buffer()> makes the buffer object
the current buffer.

B<$>I<buffer>B<-E<gt>yy_delete_buffer(> deletes the buffer object. It
is an error to delete the current buffer.

B<$>I<buffer>B<-E<gt>yy_flush_buffer()> empties the current buffer.
If B<yylex> processing continues on the buffer, more input will be
read with the buffer's B<yy_input> method.

B<yy_current_buffer> returns the current buffer object.


=head2 Condition Handling

If the function B<yy_start> is called without an argument it
returns an integer value representing the current state.
This value may be used as an argument to B<yy_start> to set
this state. B<yy_start> can also be used with a string argument
specifying a start condition. (Note that B<Begin>, which takes
a bareword condition name as an argument, can only be used
inside rule actions.)

Conditions may be stacked. The function B<yy_push_state(>I<condition>B<)>
pushes a condition on the stack; B<yy_pop_state()> pops and returns a
condition from the stack, and B<yy_top_state()> returns the topmost
condition from the stack. Accessing an empty stack is a fatal error.


=head1 THE GENERATED PERL MODULE

=head2 Insertion of User Code

The generated Perl module contains the package B<plex>. (The name
can be set with the B<--package> option.) 

Optional Perl code specified in the definition section of the input
is inserted at package level, near the immediately after the package header 
(and a `C<use strict;>'). The definitions of predefined variables
and subroutines follow.

The scanning routine B<yylex> is generated without a prototype, therefore
calls may pass arbitrary parameters. Since Perl code from the head of
the input's rule section (if any) is inserted right after the subroutine
heading, it may be used to handle parameter passing. Pattern matching and
rule actions are expanded into the body of the B<yylex> subroutine.

Code from the user code section concludes the generated module.

Variable, subroutine and label names beginning with `C<yy>' should be
considered as reserved for internal use.

=head2 Efficiency

B<plex> generates code where each pattern is matched individually,
and in the given order, with buffer contents at the current position.
After some match has been found, a subsequent rule with a pattern
that can only match a limited number of characters will not be considered
if that number is less than the length of the successful match. Using
B<Reject> (in any rule), however, prevents this optimization.

It is advantageous to keep rules with an identical set of start conditions
together since then they may be skipped after a single test.

The B<Cut> directive is another feature for improving efficiency, but
it has to be used with some caution: Care must be taken to ensure that
some later rule is not "cut off". Consider, for instance, these two
typical rules, parsing for keywords and identifiers:

   if|else|for|do|while     return( 'KWD', $yytext );
   [a-zA-Z_][a-zA-Z0-9_]*   Cut; return ( 'ID',  $yytext );

A B<Cut> in the first rule would be an error since any keyword
could be an initial substring of an identifier. Furthermore, if
B<Cut> and B<Reject> are used both, then after a B<Reject> is
executed, only those rules that were matches prior to the B<Cut>
will be reconsidered.


=head1 OPTIONS

B<plex> recognizes the following options:

=over 4

=item B<--debug>

Generates a scanner with additional code for debugging. Selected rules
are traced (on standard error):

Z<> Z<> Z<> Z<>-- #I<rule> (I<file>.I<line>): I<pattern> matches "I<string>"

where I<rule> is the rule number (as shown in the B<--verbose> output),
I<file> and I<line> the position of the rule's definition, and I<string> the
portion of the input matching the I<pattern>.  Also, changes of the start
condition and rule rejections (B<Reject>) are traced. Debug output may be
enabled and disabled dynamically by changing the variable B<$yy_plex_debug>,
which is initially set to true.

=item B<--caseignore>

The generated scanner is case-insensitive. The matched text in B<$yytext>
will contain the original input.

=item B<--help>

Instructs B<plex> to print its usage and exit.

=item B<--main>

The generated output includes a Perl main program containing a loop
with a call to B<yylex> and a print statement of the returned values.
(Note that this may not produce meaningful output if there are
references among the returned values.)

=item B<--output=>I<path>

Generated output is written to the specified file rather than
standard output.

=item B<--package=>I<name>

The generated package is given the specified name, instead of C<plex>.

=item B<--skipdef>

The default rule (echoing any unmatched character to standard output)
is omitted. On encountering an unmatched character, the program is
terminated with an appropriate error message.

=item B<--verbose>

Prints a summary of all definitions (conditions, names, rules and code)
to standard error.

=back


=head1 BUGS

The basic flaw of this implementation is the failure to implement a
pattern matching mechanism that will work with any input method used
for reading data. Since it is possible to define patterns that could
match anything from a single character to an entire file, the default
input method must read all of an input file to avoid risking failure
where there is a potential match. On the other hand, pulling in more
input while pattern matching is in progress would make it impossible
to translate rule patterns to Perl patterns, requiring the implementation
of pattern matching in Perl (which would be even more bizarre).


=head1 STANDARDS

The basic deviation from all standards is the generation of Perl code
instead of C code.

Some definitions contained in IEEE Std1003.2-1992 ("POSIX.2") do not
make sense with Perl code and have, therefore, not been implemented:

=over 4

=item * B<%array> and B<%pointer>

=item * internal table size settings (B<%p>, B<%n>, B<%a>, B<%e>, B<%k>, 
and B<%o>)

=item * Plain braces (B<{> and B<}>) are not sufficient for bracketing
actions across multiple lines; use B<%{> and B<}%>.

=item * No automatic main program generation.

=item * Trailing context and `C<$>' can be used in the same rule.

=back

B<Cut> is a B<plex> extension not available with any of the I<lex>
implementations.


=head1 AUTHOR

This Perl implementation of I<lex> was written by Wolfgang Laun,
I<Wolfgang.Laun@gmail.com>.


=head1 COPYRIGHT and LICENSE

This program is free and open software. You may use, modify,
distribute, and sell this program (and any modified variants) in any
way you wish, provided you do not restrict others from doing the same.

=cut

