#!/usr/bin/perl -w

# char2ent.pl
# 
# Simple utility to convert files with &#ddd; to/from 8bit chars
# See usage at end of this file ( or ./char2ent -h )
# PS works only with 8bit chars, not talking about UTF-16 Unicode here
#
# mode=html (default)
#   Convert 8bit chars (with high bit set) to html entity &#ddd;
#
# mode=work:
#   Convert html entities &#ddd; to the corresponding 8bit char
#
# Christophe Chisogne <christophe@publicityweb.com>

use Getopt::Long;
use strict;

my $PROG = 'char2ent';		# prog name to display
my $VERSION = '0.01';
my $DATE = '2003/11/07';
my $BACK = 'bak';		# extension for backup files

my ($mode, $backup, $confirm, $version, $help); # vars from CLI options
$mode = 'html';
my $resopt = GetOptions('version|v' => \$version,
	'help|h' => \$help,
	'mode=s' => \$mode,
	'backup|b' => \$backup,
	'confirm|c' => \$confirm,
	)
or usage();

version() if defined $version;
usage() if (@ARGV != 1) || (defined $help);
my $conv;
if ($mode =~ /html/i) {
	print "Conversion from 8bit chars to &#ddd; entities\n";
	$conv = \&char2ent;
} elsif ($mode =~ /work/i) {
	print "Conversion from &#ddd; entities to 8bit chars\n";
	$conv = \&ent2char;
} else {
	usage();
}

my $ok = 'y';
foreach my $filename (@ARGV) {
	if (defined $confirm) {
		print "Convert file [$filename]? [Yn] ";
		$ok = <STDIN>;
	}
	unless ($ok =~ /n/i) {
		print "Converting file [$filename]...\n";
		convertfile($filename);
	}
}
exit 0;

# convertfile($filename)
sub convertfile {
	my $filename = shift;
	my $tmpname = "$filename.$$";
	open INFILE, $filename or die "Cant open $filename\n";
	open OUTFILE, ">$tmpname" or die "Cant write $tmpname\n";
	while (<INFILE>) {
		print OUTFILE &$conv($_);
	}
	close INFILE;
	close OUTFILE;
	if ($backup) {
		rename($filename, "$filename.$BACK") 
			or die "Cant backup $filename.$BACK\n";
	}
	rename($tmpname, $filename) 
		or die "Cant write $filename from $tmpname\n";
}

# $line2 = char2ent($line)
sub char2ent {
	my $line = shift;
	$line =~ s/(.)/(ord $1 > 127) ? '&#'.ord($1).';' : $1/ge;
	$line;
}

# $line2 = ent2char($line)
sub ent2char {
	my $line = shift;
	$line =~ s/&#(\d\d\d);/chr($1)/ge;
	$line;
}

# version()
sub version {
	print "$PROG v$VERSION, $DATE\n\n";
	print "Convert files with 8bit chars to/from &#ddd; entities\n";
	print "\n";	
	usage();
	exit 0;
}

# usage()
sub usage {
	print <<EOF;
Usage:
$PROG [--mode=html|work] [-b] [-c] 8bitfile.txt ...
$PROG [--help] [--version]

--mode=x,  -m=x   choose html mode (default) or work mode
--backup,  -b     backup of modified file
--confirm, -c     confirm conversion of each file
EOF
	exit 1;
}

