yasm/modules/parsers/nasm/gen_instr.pl

#!/usr/bin/perl -w
# $IdPath$
# Generates NASM-compatible bison.y and token.l from instrs.dat.
#
#    Copyright (C) 2001  Michael Urman
#
#    This file is part of YASM.
#
#    YASM is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    YASM is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

use strict;
use Getopt::Long;
my $VERSION = "0.0.1";

# useful constants for instruction arrays
#  common
use constant INST	    => 0;
use constant OPERANDS	    => 1;
#  general format
use constant OPSIZE	    => 2;
use constant OPCODE	    => 3;
use constant EFFADDR	    => 4;
use constant IMM	    => 5;
use constant CPU	    => 6;
#  relative target format
use constant ADSIZE	    => 2;
use constant SHORTOPCODE    => 3;
use constant NEAROPCODE	    => 4;
use constant SHORTCPU	    => 5;
use constant NEARCPU	    => 6;

use constant TOO_MANY_ERRORS => 20;

# default options
my $instrfile = 'instrs.dat';
my $tokenfile = 'token.l';
my $tokensource;
my $grammarfile = 'bison.y';
my $grammarsource;
my $showversion;
my $showusage;
my $dry_run;

# allow overrides
my $gotopts = GetOptions ( 'input=s' => \$instrfile,
			   'token=s' => \$tokenfile,
			   'sourcetoken=s' => \$tokensource,
			   'grammar=s' => \$grammarfile,
			   'sourcegrammar=s' => \$grammarsource,
			   'version' => \$showversion,
			   'n|dry-run' => \$dry_run,
			   'help|usage' => \$showusage,
			 );

&showusage and exit 1 unless $gotopts;
&showversion if $showversion;
&showusage if $showusage;
exit 0 if $showversion or $showusage;

# valid values for instrs.dat fields
my $valid_regs = join '|', qw(
    REG_AL REG_AH REG_AX REG_EAX
    REG_BL REG_BH REG_BX REG_EBX
    REG_CL REG_CH REG_CX REG_ECX
    REG_DL REG_DH REG_DX REG_EDX
    REG_SI REG_ESI REG_DI REG_EDI
    REG_BP REG_EBP
    REG_CS REG_DS REG_ES REG_FS REG_GS REG_SS
    ONE XMMREG MMXREG segreg CRREG_NOTCR4 CR4 DRREG
    fpureg FPUREG_NOTST0 ST0 ST1 ST2 ST3 ST4 ST5 ST6 ST7 mem imm
    imm8 imm16 imm32 imm64 imm80 imm128
    imm8x imm16x imm32x imm64x imm80x imm128x
    rm8 rm16 rm32 rm1632 rm64 rm80 rm128
    rm8x rm16x rm32x rm1632x rm64x rm80x rm128x
    reg8 reg16 reg32 reg1632 reg64 reg80 reg128
    reg8x reg16x reg32x reg1632x reg64x reg80x reg128x
    mem8 mem16 mem32 mem1632 mem64 mem80 mem128
    mem8x mem16x mem32x mem1632x mem64x mem80x mem128x
    target memfar
);
my $valid_opcodes = join '|', qw(
    [0-9A-F]{2}
    \\$0\\.\\d
);
my $valid_cpus = join '|', qw(
    8086 186 286 386 486 P4 P5 P6
    FPU MMX KATMAI SSE SSE2
    AMD ATHLON 3DNOW
    SMM
    CYRIX
    UNDOC OBS PRIV PROT
    @0 @1
);

# track errors and warnings rather than die'ing on the first.
my (@messages, $errcount, $warncount);
sub die_with_errors (@)
{
    foreach (@_) { print; };
    if ($errcount)
    {
	print "Dying with errors\n";
	exit -1;
    }
}

my ($groups) = &read_instructions ($instrfile);

die_with_errors @messages;

exit 0 if $dry_run; # done with simple verification, so exit

unless ($dry_run)
{
    &output_lex ($tokenfile, $tokensource, $groups);
    &output_yacc ($grammarfile, $grammarsource, $groups);
}

# print version for --version, etc.
sub showversion
{
    print "YASM gen_instr.pl $VERSION\n";
}

# print usage information for --help, etc.
sub showusage
{
    print <<"EOF";
Usage: gen_instrs.pl [-i input] [-t tokenfile] [-g grammarfile]
    -i, --input	         instructions file (default: $instrfile)
    -t, --token	         token output file (default: $tokenfile)
    -st, --sourcetoken   token input file (default: $tokenfile.in)
    -g, --grammar        grammar output file (default: $grammarfile)
    -sg, --sourcegrammar grammar input file (default: $grammarfile.in)
    -v, --version        show version and exit
    -h, --help, --usage  show this message and exit
    -n, --dry-run        verify input file without writing output files
EOF
}

# read in instructions, and verify they're valid (well, mostly)
sub read_instructions ($)
{
    my $instrfile = shift || die;
    open INPUT, "< $instrfile" or die "Cannot open '$instrfile' for reading: $!\n";
    my %instr;
    my %groups;

    sub add_group_rule ($$$$)
    {
	my ($inst, $args, $groups, $instrfile) = splice @_;

	# slide $0.\d down by one.
	# i still say changing instrs.dat would be better ;)
	$args =~ s/\$0\.([1-4])/ '$0.' . ($1-1) /eg;

	# detect relative target format by looking for "target" in args
	if($args =~ m/target/oi)
	{
	    my ($op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu) =
		split /\t+/, $args;
	    eval {
		die "Invalid group name\n"
			if $inst !~ m/^!\w+$/o;
		die "Invalid Operands\n"
			if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
		die "Invalid Address Size\n"
			if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
		die "Invalid Short Opcode\n"
			if $shortopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
		die "Invalid Near Opcode\n"
			if $nearopcode !~ m/^(nil|(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?)$/oi;
		die "Invalid Short CPU\n"
			if $shortcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
		die "Invalid Near CPU\n"
			if $nearcpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
	    };
	    push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
	    die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
	    # knock the ! off of $inst for the groupname
	    $inst = substr $inst, 1;
	    push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $shortopcode, $nearopcode, $shortcpu, $nearcpu];
	} else {
	    my ($op, $size, $opcode, $eff, $imm, $cpu) = split /\t+/, $args;
	    eval {
		die "Invalid group name\n"
			if $inst !~ m/^!\w+$/o;
		die "Invalid Operands\n"
			if $op !~ m/^(nil|((TO|WORD|DWORD)\s)?(?:$valid_regs)([,:](?:$valid_regs)){0,2})$/oi;
		die "Invalid Operation Size\n"
			if $size !~ m/^(nil|16|32|\$0\.\d)$/oi;
		die "Invalid Opcode\n"
			if $opcode !~ m/^(?:$valid_opcodes)(,(?:$valid_opcodes)){0,2}(\+(\$\d|\$0\.\d|\d))?$/oi;
		die "Invalid Effective Address\n"
			if $eff !~ m/^(nil|\$?\d(r?,(\$?\d|\$0.\d)(\+\d)?|i,(nil|16|32)))$/oi;
		die "Invalid Immediate Operand\n"
			if $imm !~ m/^(nil|((\$\d|[0-9A-F]{2}|\$0\.\d),(((8|16|32)s?))?))$/oi;
		die "Invalid CPU\n"
			if $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
	    };
	    push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
	    die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
	    # knock the ! off of $inst for the groupname
	    $inst = substr $inst, 1;
	    push @{$groups->{$inst}{rules}}, [$inst, $op, $size, $opcode, $eff, $imm, $cpu];
	}
    }

    sub add_group_member ($$$$$)
    {
	my ($handle, $fullargs, $groups, $instr, $instrfile) = splice @_;

	my ($inst, $group) = split /!/, $handle;
	my ($args, $cpu) = split /\t+/, $fullargs;
	eval {
	    die "Invalid instruction name\n"
		    if $inst !~ m/^\w+$/o;
	    die "Invalid group name\n"
		    if $group !~ m/^\w+$/o;
	    die "Invalid CPU\n"
		    if $cpu and $cpu !~ m/^(?:$valid_cpus)(?:,(?:$valid_cpus))*$/o;
	    push @messages, "Malformed Instruction at $instrfile line $.: Group $group not yet defined\n"
		    unless exists $groups->{$group};
	    $warncount++;
	};
	push @messages, "Malformed Instruction at $instrfile line $.: $@" and $errcount++ if $@;
	# only allow multiple instances of instructions that aren't of a group
	push @messages, "Multiple Definiton for instruction $inst at $instrfile line $.\n" and $errcount++
		if exists $instr->{$inst} and not exists $groups->{$inst};
	die_with_errors @messages if $errcount and @messages>=TOO_MANY_ERRORS;
	push @{$groups->{$group}{members}}, [$inst, $group, $args, $cpu];
	$instr->{$inst} = 1;
    }

    while (<INPUT>)
    {
	chomp;
	next if /^\s*(?:;.*)$/;

	my ($handle, $args) = split /\t+/, $_, 2;

	# pseudo hack to handle original style instructions (no group)
	if ($handle =~ m/^\w+$/)
	{
	    # TODO: this has some long ranging effects, as the eventual
	    # bison rules get tagged <groupdata> when they don't need
	    # to, etc.  Fix this sometime.
	    add_group_rule ("!$handle", $args, \%groups, $instrfile);
	    add_group_member ("$handle!$handle", "", \%groups, \%instr,
			      $instrfile);
	}
	elsif ($handle =~ m/^!\w+$/)
	{
	    add_group_rule ($handle, $args, \%groups, $instrfile);
	}
	elsif ($handle =~ m/^\w+!\w+$/)
	{
	    add_group_member ($handle, $args, \%groups, \%instr,
			      $instrfile);
	}
	# TODO: consider if this is necessary: Pete?
	# (add_group_member_synonym is -not- implemented)
	#elsif ($handle =~ m/^:\w+$/)
	#{
	#    add_group_member_synonym ($handle, $args);
	#}
    }
    close INPUT;
    return (\%groups);
}

sub output_lex ($@)
{
    my $tokenfile = shift or die;
    my $tokensource = shift;
    $tokensource ||= "$tokenfile.in";
    my $groups = shift or die;

    open IN, "< $tokensource" or die "Cannot open '$tokensource' for reading: $!\n";
    open TOKEN, "> $tokenfile" or die "Cannot open '$tokenfile' for writing: $!\n";
    while (<IN>)
    {
	# Replace token.l.in /* @INSTRUCTIONS@ */ with generated content
	if (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
	{
	    foreach my $grp (sort keys %$groups)
	    {
		my %printed;
		my $group = $grp; $group =~ s/^!//;

		foreach my $grp (@{$groups->{$grp}{members}})
		{
		    unless (exists $printed{$grp->[0]})
		    {
			$printed{$grp->[0]} = 1;
			my @groupdata;
			if ($grp->[2])
			{
			    @groupdata = split ",", $grp->[2];
			    for (my $i=0; $i < @groupdata; ++$i)
			    {
				$groupdata[$i] =~ s/nil/0/;
				$groupdata[$i] = " yylval.groupdata[$i] = 0x$groupdata[$i];";
			    }
			    $groupdata[-1] .= "\n\t     ";
			}
			printf TOKEN "%-12s{%s return %-20s }\n",
			    $grp->[0],
			    (join "\n\t     ", @groupdata),
			    "\Ugrp_$group;\E";
			    # TODO: change appropriate GRP_FOO back to
			    # INS_FOO's.  not functionally important;
			    # just pedantically so.
		    }
		}
	    }
	}
	else
	{
	    print TOKEN $_;
	}
    }
    close IN;
    close TOKEN;
}

# helper functions for yacc output
sub rule_header ($ $ $)
{
    my ($rule, $tokens, $count) = splice (@_);
    $count ? "    | $tokens {\n" : "$rule: $tokens {\n";
}
sub rule_footer ()
{
    return "    }\n";
}

sub cond_action_if ( $ $ $ $ $ $ $ )
{
    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax) = splice (@_);
    return rule_header ($rule, $tokens, $count) . <<"EOF";
        if (\$$regarg == $val) {
            \$\$ = $func(@$a_eax);
        }
EOF
}
sub cond_action_elsif ( $ $ $ $ )
{
    my ($regarg, $val, $func, $a_eax) = splice (@_);
    return <<"EOF";
        else if (\$$regarg == $val) {
            \$\$ = $func(@$a_eax);
        }
EOF
}
sub cond_action_else ( $ $ )
{
    my ($func, $a_args) = splice (@_);
    return <<"EOF" . rule_footer;
        else {
            \$\$ = $func (@$a_args);
        }
EOF
}
sub cond_action ( $ $ $ $ $ $ $ $ )
{
    my ($rule, $tokens, $count, $regarg, $val, $func, $a_eax, $a_args)
     = splice (@_);
    return cond_action_if ($rule, $tokens, $count, $regarg, $val, $func,
	$a_eax) . cond_action_else ($func, $a_args);
}

#sub action ( $ $ $ $ $ )
sub action ( @ $ )
{
    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
    return rule_header ($rule, $tokens, $count)
	. "        \$\$ = $func (@$a_args);\n"
	. rule_footer;
}

sub action_setshiftflag ( @ $ )
{
    my ($rule, $tokens, $func, $a_args, $count) = splice @_;
    return rule_header ($rule, $tokens, $count)
	. "        \$\$ = $func (@$a_args);\n"
	. "        SetInsnShiftFlag(\$\$);\n"
	. rule_footer;
}

sub get_token_number ( $ $ )
{
    my ($tokens, $str) = splice @_;
    $tokens =~ s/$str.*/x/; # hold its place
    my @f = split /\s+/, $tokens;
    return scalar @f;
}

sub output_yacc ($@)
{
    my $grammarfile = shift or die;
    my $grammarsource = shift;
    $grammarsource ||= "$grammarfile.in";
    my $groups = shift or die;

    open IN, "< $grammarsource" or die "Cannot open '$grammarsource' for reading: $!\n";
    open GRAMMAR, "> $grammarfile" or die "Cannot open '$grammarfile' for writing: $!\n";

    while (<IN>)
    {
	if (m{/[*]\s*[@]TOKENS[@]\s*[*]/})
	{
	    my $len = length("%token <groupdata>");
	    print GRAMMAR "%token <groupdata>";
	    foreach my $group (sort keys %$groups)
	    {
		if ($len + length("GRP_$group") < 76)
		{
		    print GRAMMAR " GRP_\U$group\E";
		    $len += length(" GRP_$group");
		}
		else
		{
		    print GRAMMAR "\n%token <groupdata> GRP_\U$group\E";
		    $len = length("%token <groupdata> GRP_$group");
		}
	    }
	    print GRAMMAR "\n";
	}
	elsif (m{/[*]\s*[@]TYPES[@]\s*[*]/})
	{
	    my $len = length("%type <bc>");
	    print GRAMMAR "%type <bc>";
	    foreach my $group (sort keys %$groups)
	    {
		if ($len + length($group) < 76)
		{
		    print GRAMMAR " $group";
		    $len += length(" $group");
		}
		else
		{
		    print GRAMMAR "\n%type <bc> $group";
		    $len = length("%type <bc> $group");
		}
	    }
	    print GRAMMAR "\n";
	}
	elsif (m{/[*]\s*[@]INSTRUCTIONS[@]\s*[*]/})
	{
	    # list every kind of instruction that instrbase can be
	    print GRAMMAR "instrbase:    ",
		    join( "\n    | ", sort keys %$groups), "\n;\n";

	    my ($ONE, $AL, $AX, $EAX);	# need the outer scope
	    my (@XCHG_AX, @XCHG_EAX);

	    # list the arguments and actions (buildbc)
	    #foreach my $instrname (sort keys %$instrlist)
	    foreach my $group (sort keys %$groups)
	    {
		# I'm still convinced this is a hack.  The idea is if
		# within an instruction we see certain versions of the
		# opcodes with ONE, or REG_E?A[LX],imm(8|16|32).  If we
		# do, defer generation of the action, as we may need to
		# fold it into another version with a conditional to
		# generate the more efficient variant of the opcode
		# BUT, if we don't fold it in, we have to generate the
		# original version we would have otherwise.
		($ONE, $AL, $AX, $EAX) = (0, 0, 0, 0);
		# Folding for xchg (REG_E?AX,reg16 and reg16,REG_E?AX).
		(@XCHG_AX, @XCHG_EAX) = ((0, 0), (0, 0));
		my $count = 0;
		foreach my $inst (@{$groups->{$group}{rules}}) {
		    if($inst->[OPERANDS] =~ m/target/oi)
		    {
			# relative target format
			# build the instruction in pieces.

			# rulename = instruction
			my $rule = "$inst->[INST]";

			# tokens it eats: instruction and arguments
			# nil => no arguments
			my $tokens = "\Ugrp_$rule\E";
			$tokens .= " $inst->[OPERANDS]"
			    if $inst->[OPERANDS] ne 'nil';
			$tokens =~ s/,/ ',' /g;
			$tokens =~ s/:/ ':' /g;
			my $func = "bytecode_new_jmprel";

			# Create the argument list for bytecode_new
			my @args;

			# Target argument: HACK: Always assumed to be arg 1.
			push @args, '&$2,';

			# test for short opcode "nil"
			if($inst->[SHORTOPCODE] =~ m/nil/)
			{
			    push @args, '0, 0, 0, 0,';
			}
			else
			{
			    # number of bytes of short opcode
			    push @args, (scalar(()=$inst->[SHORTOPCODE] =~ m/(,)/)+1) . ",";

			    # opcode piece 1 (and 2 and 3 if attached)
			    push @args, $inst->[SHORTOPCODE];
			    $args[-1] =~ s/,/, /;
			    $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
			    # don't match $0.\d in the following rule.
			    $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
			    $args[-1] .= ',';

			    # opcode piece 2 (if not attached)
			    push @args, "0," if $inst->[SHORTOPCODE] !~ m/,/o;
			    # opcode piece 3 (if not attached)
			    push @args, "0," if $inst->[SHORTOPCODE] !~ m/,.*,/o;
			}

			# test for near opcode "nil"
			if($inst->[NEAROPCODE] =~ m/nil/)
			{
			    push @args, '0, 0, 0, 0,';
			}
			else
			{
			    # number of bytes of near opcode
			    push @args, (scalar(()=$inst->[NEAROPCODE] =~ m/(,)/)+1) . ",";

			    # opcode piece 1 (and 2 and 3 if attached)
			    push @args, $inst->[NEAROPCODE];
			    $args[-1] =~ s/,/, /;
			    $args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
			    # don't match $0.\d in the following rule.
			    $args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2)/eg;
			    $args[-1] .= ',';

			    # opcode piece 2 (if not attached)
			    push @args, "0," if $inst->[NEAROPCODE] !~ m/,/o;
			    # opcode piece 3 (if not attached)
			    push @args, "0," if $inst->[NEAROPCODE] !~ m/,.*,/o;
			}

			# address size
			push @args, "$inst->[ADSIZE]";
			$args[-1] =~ s/nil/0/;

			# now that we've constructed the arglist, subst $0.\d
			s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);

			# generate the grammar
			print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
		    }
		    else
		    {
			# general instruction format
			# build the instruction in pieces.

			# rulename = instruction
			my $rule = "$inst->[INST]";

			# tokens it eats: instruction and arguments
			# nil => no arguments
			my $tokens = "\Ugrp_$rule\E";
			$tokens .= " $inst->[OPERANDS]"
			    if $inst->[OPERANDS] ne 'nil';
			$tokens =~ s/,/ ',' /g;
			$tokens =~ s/:/ ':' /g;
			# offset args
			my $to = $tokens =~ m/\b(TO|WORD|DWORD)\b/ ? 1 : 0;
			my $func = "bytecode_new_insn";

			# Create the argument list for bytecode_new
			my @args;

			# operand size
			push @args, "$inst->[OPSIZE],";
			$args[-1] =~ s/nil/0/;

			# number of bytes of opcodes
			push @args, (scalar(()=$inst->[OPCODE] =~ m/(,)/)+1) . ",";

			# opcode piece 1 (and 2 and 3 if attached)
			push @args, $inst->[OPCODE];
			$args[-1] =~ s/,/, /;
			$args[-1] =~ s/([0-9A-Fa-f]{2})/0x$1/g;
			# don't match $0.\d in the following rule.
			$args[-1] =~ s/\$(\d+)(?!\.)/"\$" . ($1*2+$to)/eg;
			$args[-1] .= ',';

			# opcode piece 2 (if not attached)
			push @args, "0," if $inst->[OPCODE] !~ m/,/o;
			# opcode piece 3 (if not attached)
			push @args, "0," if $inst->[OPCODE] !~ m/,.*,/o;

			# effective addresses
			push @args, $inst->[EFFADDR];
			$args[-1] =~ s/,/, /;
			$args[-1] =~ s/^nil$/(effaddr *)NULL, 0/;
			$args[-1] =~ s/nil/0/;
			# don't let a $0.\d match slip into the following rules.
			$args[-1] =~ s/\$(\d+)([ri])?(?!\.)/"\$".($1*2+$to).($2||'')/eg;
			#$args[-1] =~ s/(\$\d+[ri]?)(?!\.)/\&$1/; # Just the first!
			$args[-1] =~ s/(\$\d+)r/effaddr_new_reg($1)/;
			$args[-1] =~ s[(\$\d+)i,\s*(\d+)]
			    ["effaddr_new_imm(\&$1, ".($2/8)."), 0"]e;
			$args[-1] .= ',';

			die $args[-1] if $args[-1] =~ m/\d+[ri]/;

			# immediate sources
			push @args, $inst->[IMM];
			$args[-1] =~ s/,/, /;
			$args[-1] =~ s/nil/(immval *)NULL, 0/;
			# don't match $0.\d in the following rules.
			$args[-1] =~ s/\$(\d+)(?!\.)/"\$".($1*2+$to).($2||'')/eg;
			$args[-1] =~ s/(\$\d+)(?!\.)/\&$1/; # Just the first!
			$args[-1] =~ s[^([0-9A-Fa-f]+),]
			    [ConvertIntToImm((immval *)NULL, 0x$1),];
			$args[-1] =~ s[^\$0.(\d+),]
			    [ConvertIntToImm((immval *)NULL, \$1\[$1\]),];

			# divide the second, and only the second, by 8 bits/byte
			$args[-1] =~ s#(,\s*)(\d+)(s)?#$1 . ($2/8)#eg;
			$args[-1] .= ($3||'') eq 's' ? ', 1' : ', 0';

			die $args[-1] if $args[-1] =~ m/\d+s/;

			# now that we've constructed the arglist, subst $0.\d
			s/\$0\.(\d+)/\$1\[$1\]/g foreach (@args);

			# see if we match one of the cases to defer
			if (($inst->[OPERANDS]||"") =~ m/,ONE/)
			{
			    $ONE = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/REG_AL,imm8/)
			{
			    $AL = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,imm16/)
			{
			    $AX = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,imm32/)
			{
			    $EAX = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/REG_AX,reg16/)
			{
			    $XCHG_AX[0] = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/reg16,REG_AX/)
			{
			    $XCHG_AX[1] = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/REG_EAX,reg32/)
			{
			    $XCHG_EAX[0] = [ $rule, $tokens, $func, \@args];
			}
			elsif (($inst->[OPERANDS]||"") =~ m/reg32,REG_EAX/)
			{
			    $XCHG_EAX[1] = [ $rule, $tokens, $func, \@args];
			}

			# or if we've deferred and we match the folding version
			elsif ($ONE and ($inst->[OPERANDS]||"") =~ m/imm8/)
			{
			    $ONE->[4] = 1;
			    # Output a normal version except imm8 -> imm8x
			    # (BYTE override always makes longer version, and
			    # we don't want to conflict with the imm version
			    # we output right after this one.
			    $tokens =~ s/imm8/imm8x/;
			    print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);

			    # Now output imm version, with second opcode byte
			    # set to ,1 opcode.  Also call SetInsnShiftFlag().
			    $tokens =~ s/imm8x/imm/;
			    die "no space for ONE?" if $args[3] !~ m/0,/;
			    $args[3] = $ONE->[3]->[2];
			    print GRAMMAR action_setshiftflag ($rule, $tokens, $func, \@args, $count++);
			}
			elsif ($AL and ($inst->[OPERANDS]||"") =~ m/reg8,imm/)
			{
			    $AL->[4] = 1;
			    my $regarg = get_token_number ($tokens, "reg8");

			    print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AL->[3], \@args);
			}
			elsif ($AX and ($inst->[OPERANDS]||"") =~ m/reg16,imm/)
			{
			    $AX->[4] = 1;
			    my $regarg = get_token_number ($tokens, "reg16");

			    print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $AX->[3], \@args);
			}
			elsif ($EAX and ($inst->[OPERANDS]||"") =~ m/reg32,imm/)
			{
			    $EAX->[4] = 1;
			    my $regarg = get_token_number ($tokens, "reg32");

			    print GRAMMAR cond_action ($rule, $tokens, $count++, $regarg, 0, $func, $EAX->[3], \@args);
			}
			elsif (($XCHG_AX[0] or $XCHG_AX[1]) and
			    ($inst->[OPERANDS]||"") =~ m/reg16,reg16/)
			{
			    my $first = 1;
			    for (my $i=0; $i < @XCHG_AX; ++$i)
			    {
				if($XCHG_AX[$i])
				{
				    $XCHG_AX[$i]->[4] = 1;
				    # This is definitely a hack.  The "right"
				    # way to do this would be to enhance
				    # get_token_number to get the nth reg16
				    # instead of always getting the first.
				    my $regarg =
					get_token_number ($tokens, "reg16")
					+ $i*2;

				    if ($first)
				    {
					print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_AX[$i]->[3]);
					$first = 0;
				    }
				    else
				    {
					$count++;
					print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_AX[$i]->[3]);
				    }
				}
			    }
			    print GRAMMAR cond_action_else ($func, \@args);
			}
			elsif (($XCHG_EAX[0] or $XCHG_EAX[1]) and
			    ($inst->[OPERANDS]||"") =~ m/reg32,reg32/)
			{
			    my $first = 1;
			    for (my $i=0; $i < @XCHG_EAX; ++$i)
			    {
				if($XCHG_EAX[$i])
				{
				    $XCHG_EAX[$i]->[4] = 1;
				    # This is definitely a hack.  The "right"
				    # way to do this would be to enhance
				    # get_token_number to get the nth reg32
				    # instead of always getting the first.
				    my $regarg =
					get_token_number ($tokens, "reg32")
					+ $i*2;

				    if ($first)
				    {
					print GRAMMAR cond_action_if ($rule, $tokens, $count++, $regarg, 0, $func, $XCHG_EAX[$i]->[3]);
					$first = 0;
				    }
				    else
				    {
					$count++;
					print GRAMMAR cond_action_elsif ($regarg, 0, $func, $XCHG_EAX[$i]->[3]);
				    }
				}
			    }
			    print GRAMMAR cond_action_else ($func, \@args);
			}

			# otherwise, generate the normal version
			else
			{
			    print GRAMMAR action ($rule, $tokens, $func, \@args, $count++);
			}
		    }
		}

		# catch deferreds that haven't been folded in.
		if ($ONE and not $ONE->[4])
		{
		    print GRAMMAR action (@$ONE, $count++);
		}
		if ($AL and not $AL->[4])
		{
		    print GRAMMAR action (@$AL, $count++);
		}
		if ($AX and not $AL->[4])
		{
		    print GRAMMAR action (@$AX, $count++);
		}
		if ($EAX and not $AL->[4])
		{
		    print GRAMMAR action (@$EAX, $count++);
		}

		# print error action
		# ASSUMES: at least one previous action exists
		print GRAMMAR "    | \Ugrp_$group\E error {\n";
		print GRAMMAR "        Error (_(\"expression syntax error\"));\n";
		print GRAMMAR "    }\n";

		# terminate the rule
		print GRAMMAR ";\n";
	    }
	}
	else
	{
	    print GRAMMAR $_;
	}
    }
    close IN;
    close GRAMMAR;
}