# -*-Perl-*-
################################################################
###
### EncDec.pm
###
### Author: Internet Message Group
### Created: Apr 23, 1997
### Revised: Jul 4, 2004
###
my $PM_VERSION = "IM::EncDec.pm version 20031028(IM146)";
package IM::EncDec;
require 5.003;
require Exporter;
use integer;
use strict;
use vars qw(@ISA @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw(mime_encode_string mime_decode_string
b_encode_string b_decode_string
q_encode_string q_decode_string);
use vars qw(@D2H
$mime_encode_switch
$mime_decode_switch
@Base64a %Base64b
@koi_iso);
##################################################
##
## Variables
##
@D2H = ('0'..'9', 'A'..'F');
##################################################
##
## Switches
##
$mime_encode_switch = {
B => \&b_encode_string,
Q => \&q_encode_string,
};
$mime_decode_switch = {
B => \&b_decode_string,
Q => \&q_decode_string,
};
##################################################
##
## String Encoder/Decoder
##
sub mime_encode_string($$$) {
my($str, $iso7, $iso8) = (@_);
my($point, $len, $nstr, $s) = (0, length($str), '', '');
my($single, $double) = ('', '');
my($in) = (0);
while ($point < $len) {
$s = substr($str, $point, 1);
$point ++;
if ($s eq chr(27)) {
if ($nstr ne '') {
$nstr = $nstr . "\t";
}
if ($in == 0) {
# IN
$in = 1;
if ($single ne '') {
if ($single =~ /[\x00-\x1f\x80-\xff]/) {
$single = &q_encode_string($single);
$nstr = $nstr . '=?' . $iso8 . '?Q?' . $single . "?=\n";
} else {
$nstr = $nstr . $single. "\n";
}
$single = '';
}
} else {
# OUT
$in = 0;
$double = $double . $s . substr($str, $point, 2);
$point = $point + 2;
$s = substr($str, $point, 1);
$point ++;
$double = &b_encode_string($double);
$nstr = $nstr . '=?' . $iso7 . '?B?' . $double . '?=';
$double = '';
while ($s eq ' ') {
$nstr = $nstr . ' ';
$s = substr($str, $point, 1);
$point ++;
}
$nstr = $nstr . "\n";
}
}
if ($in == 1) {
$double = $double . $s;
} else {
$single = $single . $s;
}
}
if ($single ne '') {
if ($nstr ne '') {
$nstr = $nstr . "\t";
}
if ($single =~ /[\x00-\x1f\x80-\xff]/) {
$single = &q_encode_string($single);
$nstr = $nstr . '=?' . $iso8 . '?Q?' . $single . "?=\n";
} else {
$nstr = $nstr . $single. "\n";
}
}
return $nstr;
}
sub mime_decode_string($) {
my $in = shift;
return '' if ($in eq '');
if (!$main::opt_mimedecodequoted) {
if ($in =~ /^([^"]*)("[^"]*")([\0-\255]*)$/) {
return &mime_decode_string($1) . $2 . &mime_decode_string($3);
}
}
$in =~ s/\?=\s+=\?/?==?/g;
$in =~ s/(=\?([^?]+)\?(.)\?([^?]+)\?=)/
($$mime_decode_switch{uc($3)})?mime_decode($2, $3, $4):$1/ge;
return $in;
}
sub mime_decode($$$) {
my($cs, $bq, $str) = @_;
my $ret = &{$$mime_decode_switch{uc($3)}}($4);
if ($cs =~ /iso-8859-([2-9])/i) {
$ret = iso_8859_to_ctext($ret, $1);
} elsif ($cs =~ /koi8-r/i) {
$ret = koi8r_to_ctext($ret);
} elsif ($cs =~ /tis-620/i) {
$ret = tis_620_to_ctext($ret);
} elsif ($cs =~ /cn-gb/i || $cs =~ /gb2312/i) {
$ret = cn_gb_to_ctext($ret);
} elsif ($cs =~ /hz-gb-2312/i) {
$ret = hz_to_ctext($ret);
} elsif ($cs =~ /euc-jp/i) {
$ret = euc_jp_to_ctext($ret);
} elsif ($cs =~ /euc-kr/i) {
$ret = euc_kr_to_ctext($ret);
} elsif ($cs =~ /shift_jis/i) {
$ret = shift_jis_to_ctext($ret);
} elsif ($cs =~ /big5/i || $cs =~ /cn-big5/i) {
$ret = big5_to_ctext($ret);
}
return $ret;
}
sub iso_8859_to_ctext($$) {
my($str, $num) = @_;
my @index = ("A", "A", "B", "C", "D", "L", "G", "F", "H", "M");
$str =~ s/([\x80-\xff]+)/\e-$index[$num]$1\e-A/g;
return $str;
}
@koi_iso =
(" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", "\xf1", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ", " ",
" ", " ", " ", "\xa1", " ", " ", " ", " ",
" ", " ", " ", " ", " ", " ", " ", " ",
"\xee", "\xd0", "\xd1", "\xe6", "\xd4", "\xd5", "\xe4", "\xd3",
"\xe5", "\xd8", "\xd9", "\xda", "\xdb", "\\", "\xdd", "\xde",
"\xdf", "\xef", "\xe0", "\xe1", "\xe2", "\xe3", "\xd6", "\xd2",
"\xec", "\xeb", "\xd7", "\xe8", "\xed", "\xe9", "\xe7", "\xea",
"\xce", "\xb0", "\xb1", "\xc6", "\xb4", "\xb5", "\xc4", "\xb3",
"\xc5", "\xb8", "\xb9", "\xba", "\xbb", "\xbc", "\xbd", "\xbe",
"\xbf", "\xcf", "\xc0", "\xc1", "\xc2", "\xc3", "\xb6", "\xb2",
"\xcc", "\xcb", "\xb7", "\xc8", "\xcd", "\xc9", "\xc7", "\xca");
sub koi2iso($) {
my $str = shift;
$str =~ s/(.)/$koi_iso[ord($1)-128]/ge;
return $str;
}
sub koi8r_to_ctext($) {
my $str = shift;
$str =~ s/([\x80-\xff]+)/"\e-L" . koi2iso($1). "\e-A"/ge;
return $str;
}
sub tis_620_to_ctext($) {
my($str) = shift;
$str =~ s/([\x80-\xff]+)/\e-T$1\e-A/g;
return $str;
}
sub cn_gb_to_ctext($) {
my $str = shift;
$str =~ s/([\x80-\xff]+)/"\e\$(A" . remove_msb($1) . "\e(B"/ge;
return $str;
}
sub hz_to_ctext($) {
my $str = shift;
$str =~ s/(~~)/~/g;
$str =~ s/(~{)/\e\$(A/g;
$str =~ s/(~})/\e(B/g;
return $str;
}
sub euc_jp_to_ctext($) {
my $str = shift;
$str =~ s/((\x8f[\xa0-\xff][\xa0-\xff])+)/"\e\$(D"
. remove_msb(remove_ss($1, "\x8f")) . "\e-A"/ge;
$str =~ s/(([\xa0-\xff][\xa0-\xff])+)/"\e\$(B"
. remove_msb($1) . "\e(B"/ge;
$str =~ s/((\x8e[\x80-\xff])+)/"\e)I" . remove_ss($1, "\x8e") . "\e-A"/ge;
return $str;
}
sub euc_kr_to_ctext($) {
my $str = shift;
$str =~ s/([\x80-\xff]+)/"\e\$(C" . remove_msb($1) . "\e(B"/ge;
return $str;
}
sub remove_msb($) {
my $str = shift;
$str =~ tr/\x80-\xff/\x00-\x7f/;
return $str;
}
sub remove_ss($$) {
my($str, $si) = @_;
$str =~ s/$si//g;
return $str;
}
sub shift_jis_to_ctext($) {
my $str = shift;
my $kanji = "[\x81-\x9f\xe0-\xef].";
my $kana = "[\xa0-\xdf]";
$str =~ s/($kanji($kanji|$kana)*$kanji|($kanji)+)/\e\$\(B$1\e\(B/g;
$str =~ s/(($kanji)+)/s2j($1)/ge;
$str =~ s/(($kana)+)/\e\)I$1\e-A/g;
return $str;
}
sub s2j($) {
my $str = shift;
my($c1, $c2);
my $ret = "";
while ($str) {
($c1, $c2, $str) = unpack('CCa*', $str);
if ($c2 >= 0x9f) {
$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe0 : 0x60);
$c2 += 2;
} else {
$c1 = $c1 * 2 - ($c1 >= 0xe0 ? 0xe1 : 0x61);
$c2 += 0x60 + ($c2 < 0x7f);
}
$c1 &= 0x7f;
$c2 &= 0x7f;
$ret .= pack('CC', $c1, $c2);
}
return $ret;
}
sub b157to94($) {
my $str = shift;
my($c1, $c2, $tmp);
my $ret = "";
while ($str) {
($c1, $c2, $str) = unpack('CCa*', $str);
if ($c1 < 0xc9) {
$tmp = ($c1 - 0xa1) * 157 + $c2;
} else {
$tmp = ($c1 - 0xc9) * 157 + $c2;
}
if ($c2 < 0x7f) {
$tmp -= 0x40;
} else {
$tmp -= 0x62;
}
$c1 = $tmp / 94 + 0x21;
$c2 = $tmp % 94 + 0x21;
$ret .= pack('CC', $c1, $c2);
}
return $ret;
}
sub big5_to_ctext($) {
my $str = shift;
$str =~ s/([\xa1-\xc6][\x40-\x7e\xa1-\xfe])/"\e\$(0" .
b157to94($1) . "\e(B"/ge;
$str =~ s/([\xc9-\xf9][\x40-\x7e\xa1-\xfe])/"\e\$(1" .
b157to94($1) . "\e(B"/ge;
return $str;
}
##################################################
##
## B Encoder/Decoder
##
sub b_encode_string($) {
my $mod3 = length($_[0]) % 3;
local($_);
$_ = pack('u', $_[0]);
chop;
s/(^|\n).//mg;
tr[`!-_][A-Za-z0-9+/]; #`
if ($mod3 == 1) { s/..$/==/; }
elsif ($mod3 == 2) { s/.$/=/; }
$_;
}
sub b_decode_string($) {
my $s64 = shift;
my $len;
my $res = '';
local($_);
while ($s64 =~ s/^(.{1,60})//) {
$_ = $1;
$len = length($_) * 3 / 4;
if (/(=+)$/) {
$len -= length($1);
}
tr[A-Za-z0-9+/=][`!-_A]; #`
$res .= sprintf("%c%s\n", $len + 32, $_);
}
unpack('u', $res);
}
##################################################
##
## Q Encoder/Decoder
##
sub q_encode_string($;$) {
my($line, $struct) = @_;
local($_);
$_ = $line;
if (defined($struct) && $struct) {
s/([^\w\d\!\*\+\-\/ ])/sprintf("=%02X", unpack('C', $1))/ge;
} else {
s/([^\!-<>\@-\^\`-\~ ])/sprintf("=%02X", unpack('C', $1))/ge;
}
s/ /_/g;
$_;
}
sub q_decode_string($) {
my($qstr) = @_;
local($_);
$_ = $qstr;
s/_/ /g;
s/(=)([0-9A-Za-z][0-9A-Za-z])/chr(hex('0x'. $2))/ge;
$_;
}
1;
__END__
=head1 NAME
IM::EncDec - MIME header encoder/decoder
=head1 SYNOPSIS
use IM::EncDec;
$mime_header_encoded_string =
mime_encode_string(string, iso7bit, iso8bit);
$string =
mime_decode_string(mime_header_encoded_string);
$B_encoded_string = b_encode_string(string);
$string = b_decode_string(B_encoded_string);
$Q_encoded_string = q_encode_string(string);
$string = q_decode_string(Q_encoded_string);
=head1 DESCRIPTION
The I module is encoder/decoder for MIME header.
This modules is provided by IM (Internet Message).
=head1 EXAMPLES
$_ = "JAPANESE (Kazuhiko Yamamoto)";
mime_encode_string($_, 'iso-2022-jp', 'iso-8859-1');
=> =?iso-2022-jp?B?GyRCOzNLXE9CSScbKEI=?=
(Kazuhiko Yamamoto)
s/\n[\t ]+//g;
print mime_decode_string($_), "\n";
=> "JAPANESE (Kazuhiko Yamamoto)"
=head1 COPYRIGHT
IM (Internet Message) is copyrighted by IM developing team.
You can redistribute it and/or modify it under the modified BSD
license. See the copyright file for more details.
=cut
### Copyright (C) 1997, 1998, 1999 IM developing team
### All rights reserved.
###
### Redistribution and use in source and binary forms, with or without
### modification, are permitted provided that the following conditions
### are met:
###
### 1. Redistributions of source code must retain the above copyright
### notice, this list of conditions and the following disclaimer.
### 2. Redistributions in binary form must reproduce the above copyright
### notice, this list of conditions and the following disclaimer in the
### documentation and/or other materials provided with the distribution.
### 3. Neither the name of the team nor the names of its contributors
### may be used to endorse or promote products derived from this software
### without specific prior written permission.
###
### THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND
### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
### PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE
### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
### BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
### WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
### OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
### IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.