#!/usr/bin/perl -w
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
         if 0;
$0 = 'od';

# (p)od - dump files in octal and other formats
# History:  Aug 13 2000: Original version.

use strict;
use integer;
use locale;

=head1 NAME

od - dump files in octal and other formats

=head1 SYNOPSIS

Z<> Z<> Z<> Z<>B<od> [I<option>...] [I<file>...]

=head1 DESCRIPTION

B<od> reads the input concatenated from all I<file>s specified on the
command line, or standard input if no file arguments are given, and
prints file contents in a user defined format. `C<->' may be used to
denote standard input.

Every block of 16 bytes of input data is formatted into lines according
to each of the specified formats. The first output line resulting from
a block is prefixed with an offset indicating the position of the first
byte of the printed data with respect to the beginning of the input
(counting across all input files). Runs of blocks not differing from a 
preceding block (except for the offset) are shown as a single asterisk
(`C<*>').

=cut


######
# Some useful numbers
#
my $b =  512;
my $k = 1024;
my $m = $k*$k;
my %Mult = ( b => $b, k => 1024, m => $m );

######
# character formatting
#
my @aTab = (
  ' nul', ' soh', ' stx', ' etx', ' eot', ' enq', ' ack', ' bel',
  '  bs', '  ht', '  nl', '  vt', '  ff', '  cr', '  so', '  si',
  ' dle', ' dc1', ' dc2', ' dc3', ' dc4', ' nak', ' syn', ' etb',
  ' can', '  em', ' sub', ' esc', '  fs', '  gs', '  rs', '  us',
  '  sp'
);
$aTab[127] = ' del';
for my $ic ( ord(' ')+1..126 ){
    my $c = chr($ic);
    $aTab[$ic] = '   ' . $c;
}

# char_a - translation according to -ta
#
sub char_a($){
    my $s = shift;
    my $res = '';
    for my $n ( unpack( "C" . length($s), $s ) ){
	$res .= $aTab[$n&0x7f];
    }
    return $res;
}

# char_c - translation according to -tc
#
my @cTab;
for my $ic ( 0..0xff ){
    my $c = chr($ic);
    my $res;
    if( $c !~ /[[:print:]]/ ){
	if( $c eq "\n" ){
	    $res = '  \\n';
	} elsif( $c eq "\t" ){
	    $res = '  \\t';
	} elsif( $c eq "\r" ){
	    $res = '  \\r';
	} elsif( $c eq "\f" ){
	    $res = '  \\f';
	} elsif( $c eq chr(0) ){
	    $res = '  \\0';
	} elsif( $c eq "\a" ){
	    $res = '  \\a';
	} elsif( $c eq "\b" ){
	    $res = '  \\b';
	} elsif( $c eq chr(013) ){
	    $res = '  \\v';
	} else {
	    $res = sprintf( " %03o", ord( $c ) );
	}
    } else {
	$res = '   ' . $c;
    }
    $cTab[$ic] = $res;
}

sub char_c($){
    my $s = shift();
    my $res = '';
    my @a = unpack( "C" . length($s), $s );
    for my $n ( @a ){
	$res .= $cTab[$n];
    }
    return $res;
}


# mkprint - replace non-printing chars by '.'
# 
sub mkprint($){
    my $s = shift;
    $s =~ s/[^[:print:]]/./g;
    return $s;
}

######
# number formatting
#
my( %IntSize, %FloatSize );
for my $c ( qw{ C S I L } ){
   $IntSize{$c} = length( pack( $c, 42 ) );
}
for my $c ( qw{ f d }  ){
   $FloatSize{$c} = length( pack( lc($c), 42 ) );
}
my %IntFmt = (
   o => { 1 => ' %03o', 2 => ' %06o', 4 => ' %011o', 8 => ' %022o' },
   d => { 1 =>  ' %4d', 2 =>  ' %6d', 4 =>  ' %11d', 8 =>  ' %20d' },
   u => { 1 =>  ' %3u', 2 =>  ' %5u', 4 =>  ' %10u', 8 =>  ' %20u' },
   x => { 1 => ' %02x', 2 => ' %04x', 4 =>  ' %08x', 8 => ' %016x' },
);
my %FloatFmt = (
   f => ' %14.6e',
   d => ' %23.15e',
);

sub intcode($){
    my $len = shift();
    for my $key ( keys( %IntSize ) ){
	return $key if $len == $IntSize{$key};
    }
    return undef();
}

sub floatcode($){
    my $len = shift();
    for my $key ( keys( %FloatSize ) ){
	return $key if $len == $FloatSize{$key};
    }
    return undef();
}

######
# error handling
#
my $Errors    = 0;
sub Warn($){
    warn( "$0: $_[0]\n" );
    $Errors++;
}
sub Die($){
    die( "$0: $_[0]\n" );
}

######
# binary input stream
#
my $Blocksize = 8 * $k;
my $EndOfFile = 0;
my $theFile;

sub openARGV(){
    unshift( @ARGV, '-' ) unless @ARGV;
    $theFile = shift( @ARGV );
    open( ARG, "<$theFile" )
    || Die( "can't open $theFile for reading ($!)\n" );
    binmode( ARG ) unless $theFile eq '-';
}

sub readARGV(\$$){
    my( $argref, $len ) = @_;
    my $offset = 0;
    return $offset if $EndOfFile;
    for(;;){
	my $read = read( ARG, $$argref, $len, $offset );
	if( defined( $read ) ){
	    $offset += $read;
	    $len    -= $read;
	    return $offset if $len == 0;
	    # end of file
	    close( ARG );
	    for(;;){
		if( ! @ARGV ){
		    $EndOfFile = 1;  
		    return $offset;
		}
		$theFile = shift( @ARGV );
		last if open( ARG, "<$theFile" );
		Warn( "can't open $theFile for reading ($!)" );
	    }
	} else {
	    Die( "$theFile: error reading ($!)" );
	}
    }
}

# seekARGV
#
sub seekARGV($$){
    my( $pos, $whence ) = @_;

    # on a plain file, get the size via -s()
    #
    while( $pos ){
	if( -f ARG ){
	    my $size = -s _;
	    if( $pos >= $size ){
		# position exceeds file size: 
		$pos -= $size;
		close( ARG );
		for(;;){
		    return 0 unless @ARGV;
		    $theFile = shift( @ARGV );
		    last if open( ARG, "<$theFile" );
		    Warn( "can't open $theFile for reading ($!)" );
		}
		next;
	    } else {
		return 1 if seek( ARG, $pos, $whence );
                last;
	    }
	}
    }

    # seek/Seek failed. ( Is it `-'? ) Try seek by read.
    while( $pos ){
	my $buf;
	my $want = $pos > $Blocksize ? $Blocksize : $pos;
	my $read = readARGV( $buf, $want );
	return 0 unless $read == $want;
	$pos -= $read;
    }
}

my $Buffer = '';
my $BufLen = 0;  # BufLen - BufPtr  ... remaining bytes
my $BufPtr = 0;  # BufPtr >= BufLen ... all data has been read

sub getData(\$$){
    my( $strptr, $reqlen ) = @_;
    my $putlen = 0;
    my $nb;
    while( $reqlen > 0 ){
	if( $BufPtr >= $BufLen ){
            $BufLen = readARGV( $Buffer, $Blocksize );
	    $BufPtr = 0;
	    return $putlen if $BufLen == 0;
	}
	if( $BufLen - $BufPtr >= $reqlen ){
	    $$strptr = substr( $Buffer, $BufPtr, $reqlen );
            $nb = $reqlen;
	} else { # $BufPtr < $BufLen
	    $$strptr = substr( $Buffer, $BufPtr );
            $nb = $BufLen - $BufPtr;
	}
	$BufPtr += $nb;
	$putlen += $nb;
	$reqlen -= $nb;
    }
    return $putlen;
}


######
# lcm - least common multiple
#
sub lcm(@){
    my %mf;
    for my $n ( @_ ){
	my %pf;
	for( my $i = 2; $i <= $n; $i++ ){
	    while( $n % $i == 0 ){
		$n /= $i;
		exists( $pf{$i} ) ? ( $pf{$i} *= $i ) : ( $pf{$i} = $i );
	    }
	}
	for my $f ( keys( %pf ) ){
	    if( ! exists( $mf{$f} ) || $pf{$f} > $mf{$f} ){
		$mf{$f} = $pf{$f};
	    }
	}
    }
    my $l = 1;
    for my $f ( keys( %mf ) ){ $l *= $mf{$f}; }
    return $l;
}

=head1 OPTIONS

=over 4

=item B<-A> I<radix>

Specify the base for printing data offsets. Use `d' for decimal,
`o' for octal, `x' for hexadecimal, and `n' to suppress printing the
offset. Default is `o' (octal).

=item B<-j> I<bytes>

Skip the given number of input bytes before printing output.
The number can be decimal, octal (after a leading `0') or hexadecimal
(after a leading `0x' or `0X'). Appending the character `b', `k' or `m'
to the number will cause it to be interpreted as a multiple of 512, 1024
or 1048576 bytes, respectively.  A trailing `b' in a hexadecimal
number is interpreted as a hexadecimal number.

=item B<-N> I<bytes>

Format at most the indicated number of bytes. Prefixes and suffixes
are interpreted as with the B<-j> option argument.

=item B<-t> [I<type>]

Specify one or more output formats. The option may be used repeatedly,
or a I<type> may consist of more than one format specification. A
format specification consists of one of the letters `a', `c', `d',
`f', `o', `u', `x', specifying named character, character, signed decimal,
floating point, octal, unsigned decimal and hexadecimal, respectively.
The letters `d', `f', `o', `u' and `x' can be followed by an optional
unsigned decimal integer that specifies the number of bytes to be
transformed by each instance of the output type. Alternatively, the type
specification character `f' can be followed by an optional `F', `D' or 'L',
indicating that the conversion should be applied to an item of type float,
double or long double, respectively; and the letters `d', `o', `u' and `x'
can be followed by an optional `C', `S', `I' or `L' indicating that the
conversion should be applied to an item of type char, short, int or long,
respectively. The default length specifyer is `I' for integral formats
and `D' for the float format.

If the letter `z' is added to any format, a display of the printable
characters (with non-printables replaced by `.') of the data block
is appended to the formatted data.

If no other format is specified, the format defined by B<-to2> is used.

Note that format type `a' uses only the least significant seven bits
of each byte.

=item B<-v>

Produce output for each input block, i.e. do not suppress identical output.

=item B<-w>[I<width>]

Use the given number of bytes (or 32, if I<width> is omitted) as
block size for each formatted output line.

=back

=cut

sub usage(){
    print STDERR <<'[TheEnd]';
Usage: od [option]... [file]...
   -A o|d|x|n  print offset in octal, decimal, hexadecimal; or no offset
   -j bytes    skip specified number of bytes before printing output
   -N bytes    stop printing after specified number of bytes
   -t type     output format: a|c, f[<int>|F|D|L], {d|o|u|x}[<int>|C|S|I|L]
   -v          print all data, i.e. do not use `*' to mark repeating lines
   -w [width]  print specified number of bytes per output line
[TheEnd]
    exit( $Errors );
}

# defaults
#
my %Opt = (
   A => 'o',
   j => 0,
   N => -1,
   t => [],
   v => 0,
   w => 16
);

sub crackType($){
    my( $type ) = @_;
    my( $fmt, $len, $cod, $str, $c );
    $str = $type =~ s/z$// ? 1 : 0;
    if( $type eq 'a' || $type eq 'c' ){
	$fmt = $type;
        $len = 1;
	$cod = 'C';
    } elsif( $type =~ s/f// ){
	$fmt = 'f';
        $type = 'D' if $type eq '';
	if( $type =~ /[FD]/ ){
	    $cod = lc( $type );
 	    $len = $FloatSize{$cod};
	} else {
	    # numeric - must be a valid length
	    $len = $type;
	    $cod = floatcode( $len );
	    if( ! defined( $cod ) ){
		Warn( "no $len-byte floating point type" );
		exit( 1 );
	    }
	}
    } elsif( $type =~ s/^([doux])// ){
	$fmt = $1;
        $type = 'I' if $type eq '';
	if( $type =~ /[CSIL]/ ){
	    $cod = $type;
 	    $len = $IntSize{$cod};
	} else {
	    # numeric - must be a valid length
	    $len = $type;
	    $cod = intcode( $len );
	    if( ! defined( $cod ) ){
		Warn( "no $len-byte integral type" );
		exit( 1 );
	    }
	}
    }
    return ( $fmt, $len, $cod, $str );
}

###
# Parse options
#
while( @ARGV && $ARGV[0] =~ /^-(.)(.*)$/ ){
    my $opt = $1;
    my $arg = $2;
    shift( @ARGV );
    if(      $opt =~ /[AjNt]/ ){
        if( length( $arg ) == 0 ){
	    if( @ARGV ){
		$arg =  shift( @ARGV ); 
	    } else {
		Warn( "option `-$opt' requires an argument" );
		usage();
	    }
        }
	if(      $opt eq 'A' ){
	    if( $arg =~ /[odxn]/ ){
		$Opt{$opt} = $arg;
	    } else {
		Warn( "option `-A' requires as argument one of `o', `d', `x', `n'" );
		usage();
	    }
	} elsif( $opt eq 'j' || $opt eq 'N' ){
	    if( $arg =~ s/^([1-9][0-9]*|0x[0-9a-f]+|0[0-7]*)([bkm]?)$//i ){
		my $number = $1;
		my $faktor = $2 ne '' ? $Mult{$2} : 1;
		$number = oct( lc($number) ) if $number =~ /^0/;
		$Opt{$opt} = $number * $faktor;
	    } else {
		Warn( "option `-$opt' requires an integer argument" );
		usage();
	    }
	} elsif( $opt eq 't' ){
	    while( $arg =~
		   s/^((?:[ac]|[doux](?:\d+|[CSIL])?|f(?:\d+|[FD])?)z?)// ){
		my( $fmt, $len, $cod, $chr ) = crackType( $1 );
		push( @{$Opt{$opt}}, [ $fmt, $len, $cod, $chr ] );
	    }
	    if( length( $arg ) ){
		Warn( "invalid argument for option `-t'" );
		usage();
	    }
	} 
	next;

    } elsif( $opt eq 'w' ){
	if( $arg =~ s/^(\d+)// ){
	    $Opt{w} = $1;
	    if( length( $arg ) ){
		Warn( "invalid character following `-w$1$arg'" );
		usage();
	    }
        } else {
	    $Opt{w} = 32;
	}

    } elsif( $opt eq '-' && $arg eq '' ){
	last;
    } elsif( $opt eq 'h' || $opt eq '?' ){
        usage();
        exit( 0 );
    } else {
        Warn( "illegal option `$opt'" );
        usage();
        exit( 1 );
    }

    if( length( $arg ) ){
	unshift( @ARGV, "-$arg" );
    }
}

# default for -t option: -to2  
#
push( @{$Opt{t}}, [ 'o', 2, intcode(2), 0 ] ) unless @{$Opt{t}};

# check -w option: multiple of least common multiple
#
my @lens;
for my $fmtref ( @{$Opt{t}} ){
    push( @lens, $fmtref->[1] );
}
my $lcm = lcm( @lens );
if( $Opt{w} % $lcm != 0 || $Opt{w} == 0 ){
    my $w = $Opt{w} / $lcm * $lcm;
    $w = $lcm if $w == 0;
    Warn( "invalid width $Opt{w} (option `-w'), using $w" );
    $Opt{w} = $w;
}

if( 0 ){
for my $opt ( sort keys( %Opt ) ){
    print "-$opt:";
    if( $opt eq 't' ){
	for my $fmtref ( @{$Opt{t}} ){
	    print " $fmtref->[0]$fmtref->[1]:$fmtref->[2]",
	          $fmtref->[3] ? 'z' : '';
	}
	print "\n";
    } else {
	print " $Opt{$opt}\n";
    }
}
}

##############
#### MAIN ####
##############

# create print routine
# assume: $theOffset, $theData, $theTemp;
#
my $Code = '';
my $Last = '';

#-->>>
$Code .= <<'TheEnd';
sub Run($$$$){
    my( $theOffset, $theWidth, $theLimit, $theLCM ) = @_;
    my( $oldData, $inSame, $theData, $got, $outOffset );

    $oldData = '';
    $inSame = 0;
TheEnd
#--<<<

    if( $Opt{N} >= 0 ){
#-->>>
	$Code .= <<'TheEnd';
    my $toGet;
    while ( ( $toGet = $theLimit >= $theOffset + $theWidth
                       ? $theWidth
                       : $theLimit - $theOffset ),
	    $toGet > 0 &&
	    ( $got = readARGV( $theData, $toGet ) ) == $theWidth ){
TheEnd
#--<<<
    } else {
#-->>>
	$Code .= <<'TheEnd';
    while ( ( $got = readARGV( $theData, $theWidth ) ) == $theWidth ){
TheEnd
#--<<<
    }

    if( ! $Opt{v} ){
#-->>>
$Code .= <<'TheEnd';
	if( $oldData eq $theData ){
            print "*\n" unless $inSame;
	    $inSame = 1;
	    next;
	}
        $inSame = 0;
	$oldData = $theData;
TheEnd
#--<<<
    }

my $offset  = 1;
my $trailoff = '';
my @DumpLine;
for my $fmtref ( @{$Opt{t}} ){
    my $template;
    my $valcol = '';
    my $fmtstr = '';
    my $lastoff  = '""';
    my $lastval  = '';

    my $optA = $Opt{A};
    if( $optA ne 'n' ){
	if( $offset ){
	    $offset = 0;
	    if(      $optA eq 'o' ){
		$fmtstr .= '%07o';
		$valcol .= ', $theOffset';
	    } elsif( $optA eq 'd' ){
		$fmtstr .= '%07d';
		$valcol .= ', $theOffset';
	    } elsif( $optA eq 'x' ){
		$fmtstr .= '%06x';
		$valcol .= ', $theOffset';
	    }
	    $lastoff  .= '."' . $fmtstr . '"';
	    $lastval  = $valcol;
	    $trailoff = 'printf( "' . $fmtstr . '\n"' . $valcol . ' );';

	} else {
	    if(      $optA eq 'o' || $optA eq 'd' ){
		$fmtstr .= '       ';
	    } elsif( $optA eq 'x' ){
		$fmtstr .= '      ';
	    }
	    $lastoff  .= '."' . $fmtstr . '"';
	}
    }

    my( $fmt, $len, $cod, $str ) = @$fmtref;
    my $vpl = $Opt{w}/$len;
    if(      $fmt =~ /[ac]/ ){
	my $h = 4*$vpl;
	$fmtstr  .= "%-${h}s";
	$template = '';
	$valcol  .= ', char_' . $fmt . '( $theData )';
	# incomplete line
	$lastoff .= '."' . "%-${h}s" .'"';
        $lastval .= ', char_' . $fmt . '( $theData )';
    } elsif( $fmt =~ /[dfoux]/ ){
	my $h = $fmt eq 'f' ? $FloatFmt{$cod} : $IntFmt{$fmt}{$len};
	$fmtstr .= $h x $vpl;
	$template = ( $fmt eq 'd' ? lc($cod) : $cod ) x $vpl;
	$valcol .= ', unpack( "' . $template . '", $theData )';
	# incomplete line
	$lastoff .= '.("' . $h . '"x(length($theData)/' . $len . '))';
	$lastval .= ', unpack( "' . $template . '", $theData )';
    }

    if( $str ){
	$fmtstr .= '  >%s<';
	$valcol .= ', mkprint( $theData )';
	# incomplete line
	$lastoff .= '."  >%s<"';
	$lastval .= ', mkprint( $theData )';
    }
    $lastoff .= '."\n"';

#-->>>
    $Code .= '        printf( "' . $fmtstr . '\\n"' . $valcol . " );\n";
    $Last .= "        printf( $lastoff $lastval );\n";
#--<<<
}

#-->>>
$Code .= <<'TheEnd';
    } continue {
        $theOffset += $theWidth;
        $got = 0;
    }

    if( $got ){
	if( $got % $theLCM ){
	    $theData .= "\0" x ($theLCM - $got % $theLCM);
	}
TheEnd
#--<<<
$Code .= $Last;

#-->>>
$Code .= <<TheEnd;
    }
    \$theOffset += \$got;
    $trailoff
}
TheEnd
#--<<<

print $Code;
# evaluate and check for errors
#
eval $Code;
if( $@ ){
    print STDERR "Code:\n$Code";
    Die( "internal error - generated incorrect Perl code: $@\n" );
}

# execute
#
openARGV();
if( $Opt{j} ){
    Die( 'cannot skip past end of combined input' )
      unless seekARGV( $Opt{j}, 0 );
}
&Run( $Opt{j}, $Opt{w}, $Opt{j}+$Opt{N}, $lcm );

exit( 0 );


=head1 ENVIRONMENT

No environment variables are used.

=head1 BUGS

The format type fL (long double) is not implemented since there is no
way of handling long doubles in Perl.

Multibyte character dumping in connection with B<-tc> is not implemented.

=head1 STANDARDS

This B<od> implementation follows the the IEEE Std1003.2-1992 ("POSIX.2")
definition of B<od>, with the exception of the "traditional" syntax
C<od [-bcdosx][file] [[+]offset[.][b]]> not being implemented.

Extensions are: the B<-w> option, and the `z' suffix to format types.

=head1 AUTHOR

This Perl implementation of I<od> was written by Wolfgang Laun,
I<Wolfgang.Laun@gmail.com>.

=head1 COPYRIGHT and LICENSE

This program is free and open software. You may use, modify,
distribute, and sell this program (and any modified variants) in any
way you wish, provided you do not restrict others from doing the same.

=cut

