Code for converting windows extended ascii format text (e.g., save as .txt from MSWord):
# (Windows-1252 converts \0x92 to \x{2019} for MSWord's apostrophe, etc) open(TEXT, "<:encoding(Windows-1252)", $textFile) or die("Cannot read ".$textFile); while (my $inp=) { # convert MSWord apostrophes to ascii (hexdump shows X92/91 which are blindly # converted to E28099) $inp =~ s/[\x{2019}\x{2018}]/\'/g; # convert MSWord ellipses to ascii (hexdump shows X85 which is blindly converted # to E280A6) $inp =~ s/\x{2026}/.../g; # convert MSWord quotes to ascii (hexdump shows X93/94 which are blindly converted # to E2809C/9D) $inp =~ s/[\x{201C}\x{201D}]/\"/g; # rest of code } close TEXT;