# -*-Perl-*-
################################################################
###
### Iso2022jp.pm
###
### Author: Internet Message Group
### Created: Apr 23, 1997
### Revised: Apr 23, 2007
###
my $PM_VERSION = "IM::Iso2022jp.pm version 20161010(IM153)";
package IM::Iso2022jp;
require 5.003;
require Exporter;
use IM::Util;
use IM::EncDec qw(b_encode_string q_encode_string);
use IM::Japanese qw(code_check conv_iso2022jp);
use integer;
use strict;
use vars qw(@ISA @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw(set_debug_encode
struct_iso2022jp_mimefy
line_iso2022jp_mimefy
header_iso2022jp_conv
);
use vars qw($Jp_Bin $Jp_Qin $Jp_out
$Jis_kanji $Jis_roman
$C_pascii);
($Jp_Bin, $Jp_Qin, $Jp_out) = ('=?ISO-2022-JP?B?', '=?ISO-2022-JP?Q?', '?=');
($Jis_kanji, $Jis_roman) = ('\e\$[\@B]', '\e\([BJ]');
$C_pascii = '[\x21-\x7e]+';
BEGIN {
$main::Folding_length = 72 unless (defined($main::Folding_length));
}
##### STRUCTURED HEADER LINE ISO-2022-JP MIME CONVERSION #####
#
# struct_iso2022jp_mimefy(lines)
# lines: continuous header lines to be converted
# return value: (lines, err)
# lines: converted lines (NULL if error)
#
sub struct_iso2022jp_mimefy($) {
my $line_in = shift;
my($c, $groupsyntax);
my($inquote, $incomment, $addrquote) = (0, 0, 0);
my($groupcolon, $need_space, $need_encode) = (0, 0, 0);
my($line_out, $line_work) = ('', '');
my($n);
im_debug("encoding structured: $line_in\n") if (&debug('encode'));
while ($line_in ne '') {
if ($line_in =~ /^($Jis_kanji[^\e]+$Jis_roman([ \t]*$Jis_kanji[^\e]+$Jis_roman)*)(.*)/os) {
$c = $1;
$line_in = $3;
$need_encode = 1;
} elsif ($line_in =~ /^($Jis_roman)(.*)/os) { # XXXX
$c = $1;
$line_in = $2;
$need_encode = 1;
} else {
($c, $line_in) = unpack('a a*', $line_in);
}
if (!$inquote && $c =~ /^\s$/) {
# split/encode
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $line_work,
$need_space, 1).$c;
} else {
$line_out = &hdr_cat($line_out, $line_work.$c, '');
}
$line_work = '';
# $need_space = 0;
next;
} elsif ($inquote) {
$line_work .= $c;
if ($c eq '"') {
$inquote = 0;
} elsif ($c eq '\\') {
my $tmp;
($tmp, $line_in) = unpack('a a*', $line_in);
$line_work .= $tmp;
}
next;
} elsif ($incomment) {
if ($c eq '(') {
$incomment++;
} elsif ($c eq ')') {
$incomment--;
if ($incomment == 0) {
# encode
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $line_work,
$need_space, 1).$c;
} else {
$line_out = &hdr_cat($line_out, $line_work.$c, '');
}
$line_work = '';
$need_space = 1;
next;
}
} elsif ($c eq '\\') {
$line_work .= $c;
($c, $line_in) = unpack('a a*', $line_in);
}
$line_work .= $c;
next;
} elsif ($c eq '"') {
$inquote = 1;
} elsif ($c eq '(') { # beggining of a comment
$incomment++;
# encode and split
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $line_work, 0, 1).$c;
} else {
$line_out = &hdr_cat($line_out, $line_work.$c, '');
}
$line_work = '';
$need_space = 0;
next;
} elsif ($c eq ')') {
im_err("Unbalanced comment parenthesis ('(', ')'): "
."$line_out$line_work -- $c -- $line_in\n");
# &error_exit;
return '';
} elsif ($c eq '<') {
# encode
$addrquote++;
if ($addrquote == 1) {
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_work = &word_iso2022jp_mimefy($n, $line_work,
$need_space, 1).' ';
$line_out .= $line_work;
} else {
$line_out = &hdr_cat($line_out, $line_work, '');
}
$line_work = $c;
$need_space = 1;
next;
}
} elsif ($c eq '>') {
$addrquote--;
if ($addrquote == 0) {
# split
$line_out = &hdr_cat($line_out, $line_work.$c, '');
$line_work = '';
$need_space = 1;
next;
}
} elsif ($c eq '\\') {
$line_work .= $c;
($c, $line_in) = unpack('a a*', $line_in);
} elsif ($c eq ':') {
$line_work .= $c;
($c, $line_in) = unpack('a a*', $line_in);
$groupcolon = 1 if ($c ne ':');
} elsif ($c eq ';') {
if ($groupcolon) {
$groupcolon = 0;
$groupsyntax = 1;
}
} elsif ($c eq ',') {
unless ($groupcolon) {
# trail
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $line_work,
$need_space, 1).' '.$c;
} else {
$line_out = &hdr_cat($line_out, $line_work.$c, '');
}
$line_work = '';
$need_space = 1;
next;
}
}
$line_work .= $c;
}
# trail
if ($line_work ne '' && $need_encode) {
$need_encode = 0;
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $line_work, $need_space, 1);
} else {
$line_out = &hdr_cat($line_out, $line_work, '');
}
im_debug("encoded structured: $line_out\n") if (&debug('encode'));
if ($addrquote) {
im_err("Unbalanced address quotes ('<', '>'): $line_out\n");
# &error_exit;
return '';
}
if ($inquote) {
im_err("Unbalanced quotes ('\"'): $line_out\n");
# &error_exit;
return '';
}
if ($incomment) {
im_err("Unbalanced comment parenthesis ('(', ')'): $line_out\n");
# &error_exit;
return '';
}
if ($line_out =~ /$Jis_kanji[^\e]+$Jis_roman/o) {
im_err("invalid iso-2022-jp charset location in structured field: "
. "$line_out\n");
# &error_exit;
return '';
}
return $line_out;
}
##### UNSTRUCTURED HEADER LINE ISO-2022-JP MIME CONVERSION #####
#
# line_iso2022jp_mimefy(lines)
# lines: continuous header lines to be converted
# return value: converted lines
#
sub line_iso2022jp_mimefy($) {
my($line_in) = @_;
my($line_out, $this_word, $this_space, $this_code, $follow, $n);
$follow = 0;
$this_space = '';
$line_out = '';
im_debug("encoding unstructured: $line_in\n") if (&debug('encode'));
while ($line_in ne '') {
if ($line_in =~ /^\n([ \t]*)(.*)/s) { # fold headdings
$line_in = $2;
if ($this_space ne '') {
$line_out .= $this_space;
$this_space = '';
}
if ($1 ne '') {
$line_out .= "\n$1";
} else {
$line_out .= "\n";
}
$follow = 0;
next;
}
$this_space = '';
if ($line_in =~ /^([ \t]+)(.*)/s) { # just spaces
$line_in = $2;
$this_space = $1;
}
$this_word = '';
$this_code = 'us-ascii';
while ($line_in ne '') {
if ($line_in =~ /^($C_pascii)(.*)/os) {
$line_in = $2;
$this_word .= $1;
} elsif ($line_in =~ /^($Jis_kanji[^\e]+$Jis_roman([ \t]*$Jis_kanji[^\e]+$Jis_roman)*)(.*)/os) {
last
if ($this_code ne 'us-ascii' && $this_code ne 'iso-2022-jp');
$line_in = $3;
$this_word .= $1;
$this_code = 'iso-2022-jp';
} elsif ($line_in =~ /^($Jis_roman)(.*)/os) { # XXX
last
if ($this_code ne 'us-ascii' && $this_code ne 'iso-2022-jp');
$line_in = $2;
$this_word .= $1;
$this_code = 'iso-2022-jp';
} elsif ($line_in =~ /^[ \t]+/) { # just spaces
last;
} elsif ($line_in =~ /^\n[ \t]*/) { # fold headdings
last;
} else {
# anything else (XXX should be Q-encoded?)
last if ($this_code ne 'us-ascii' && $this_code ne 'unknown');
(my $tmp, $line_in) = unpack('a a*', $line_in);
$this_word .= $tmp;
$this_code = 'unknown';
}
}
if ($this_code eq 'us-ascii' || $this_code eq 'unknown') {
$line_out = &hdr_cat($line_out, $this_word, $this_space);
$this_space = '';
$follow = 0;
} elsif ($this_code eq 'iso-2022-jp') {
# ISO-2022-JP encoding
im_debug("encoding: $this_word\n") if (&debug('encode'));
if ($this_space ne '') {
if ($follow) {
$this_word = $this_space . $this_word;
} else {
$line_out .= $this_space;
}
}
$line_out =~ /([^\n]*)$/;
$n = length($1);
$line_out .= &word_iso2022jp_mimefy($n, $this_word, $follow, 0);
$this_space = '';
$follow = 1;
}
}
return $line_out;
}
##### WORD ISO-2022-JP MIME CONVERSION #####
#
# word_iso2022jp_mimefy(size, word, need_pre_space, struct)
# size: length already occupied in the last line
# word: word to be converted
# need_pre_space: space should be prepended
# struct: true if in structured field
# return value: encoded words
#
sub word_iso2022jp_mimefy($$$$) {
my($size, $word_in, $need_pre_space, $struct) = @_;
my($word_out) = '';
my($word_conv, $n, $word_sub, $word_rest);
if ($main::NoFolding) {
if ($main::HdrQEncoding) {
$word_out .= $Jp_Qin;
$word_out .= &q_encode_string($word_in, $struct);
} else {
$word_out .= $Jp_Bin;
$word_out .= &b_encode_string($word_in);
}
$word_out .= $Jp_out;
return $word_out;
}
$size = $main::Folding_length - $size;
im_debug("encoding word($size): $word_in\n") if (&debug('encode'));
if ($size - length($Jp_Bin) - length($Jp_out) - 12 <= 0) {
$word_out .= "\n\t";
$size = $main::Folding_length;
} elsif ($need_pre_space) {
$word_out .= ' ';
}
while ($word_in ne '') {
$word_conv = '';
$word_out =~ /([^\n]*)$/;
$n = int(($size - (length($1) + length($Jp_Bin)
+ length($Jp_out) + 12))/4*3);
while (($n > 0) && $word_in ne '') {
# if ($word_in !~ /$Jis_kanji/o) {
# # us-ascii part
# ($word_sub, $word_in) = unpack("a$n a*", $word_in);
# $word_conv .= $word_sub;
# $n -= length($word_sub);
# next;
# }
if ($word_in =~ /^([^\e]+)(.*)/s) {
# us-ascii part
($word_sub, $word_in) = unpack("a$n a*", $1);
$word_in .= $2;
$word_conv .= $word_sub;
$n -= length($word_sub);
next;
} elsif ($word_in =~ /^($Jis_roman)([^\e]+)(.*)/s) {
# JIS roman part
if ($n < 3) {
$n = 0;
next;
}
($word_sub, $word_in) = unpack("a$n a*", $2); # work_in?
$word_sub = $1 . $word_sub;
$word_in .= $3;
$word_conv .= $word_sub;
$n -= length($word_sub);
next;
} elsif ($word_in =~ /($Jis_kanji)([^\e]+)($Jis_roman)(.*)/os) {
# iso-2022-jp part
$n = int($n/2)*2 - 6;
if ($n < 2) {
$n = 0;
next;
}
($word_sub, $word_rest) = unpack("a$n a*", $2);
if ($word_rest) {
$word_in = "$1$word_rest$3$4";
} else {
$word_in = $4;
}
$word_conv .= "$1$word_sub$3";
$n -= length($word_sub)+6;
next;
} else {
# Unsupported charset (XXX)
$word_conv .= $word_in;
$word_in = '';
}
}
if ($word_conv ne '') {
if ($main::HdrQEncoding) {
$word_out .= $Jp_Qin;
$word_out .= &q_encode_string($word_conv, $struct);
} else {
$word_out .= $Jp_Bin;
$word_out .= &b_encode_string($word_conv);
}
$word_out .= $Jp_out;
}
if ($word_in ne '') {
$word_out .= "\n\t";
}
$size = $main::Folding_length;
}
return $word_out;
}
##### HEADER ISO-2022-JP CONVERSION #####
#
# header_iso2022jp_conv(Header)
# return value: status
# 0: success
# -1: failure
#
sub header_iso2022jp_conv($$) {
my($header, $code_conv) = @_;
my($i, $c);
my($field_name, $field_value);
for ($i = 0; $i <= $#$header; $i++) {
im_debug("Iso2022jp: converting: $$header[$i]\n") if (&debug('encode'));
$c = &code_check($$header[$i]);
if ($code_conv) {
if ($c eq 'sORe') {
if ($main::Body_code ne '') {
$c = lc($main::Body_code);
} else {
$c = lc($main::Default_code);
}
}
im_debug("Iso2022jp: code conversion from $c\n")
if (&debug('encode'));
if ($c eq 'sjis' || $c eq 'euc') {
$$header[$i] = &conv_iso2022jp($$header[$i], uc($c));
}
$c = 'jis';
}
if ($c eq 'jis') {
if ($$header[$i] =~ /^([\w-]+:\s*)(\S.*)/s) {
$field_name = $1;
$field_value = $2;
if ($field_name =~ /^Apparently-To:/i
|| $field_name =~ /^(Resent-)?(To|Cc|Bcc|Dcc|From|Sender|Reply-To):/i
|| $field_name =~ /^Originator:/i
|| $field_name =~ /^Errors-To:/i
|| $field_name =~ /^Return-Receipt-To:/i) {
# structured field
my $l = &struct_iso2022jp_mimefy($field_value);
return -1 if ($l eq '');
$$header[$i] = "$field_name$l";
} else {
$$header[$i] = $field_name.&line_iso2022jp_mimefy($field_value);
}
}
}
im_debug("Iso2022jp: converted: $$header[$i]\n")
if (&debug('encode'));
}
return 0;
}
##### HEADER CONCATINATION #####
#
# hdr_cat(str1, str2, space)
# str1: a preceeding header string
# str2: a header string to be appended to str1
# space: separatig space
# return value: a concatinated header string
#
sub hdr_cat($$$) {
my($str1, $str2, $space) = @_;
if ($str1 eq '' || $str1 =~ /\n[\t ]+$/) {
return "$str1$space$str2";
}
$str1 =~ /([^\n]*)$/;
my $l1 = length($1);
$str2 =~ /^([^\n]*)/;
my $l2 = length($1);
if (!$main::NoFolding
&& ($l1 + length($space) + $l2 + 1 > $main::Folding_length)) {
$space = "\t" if ($space eq '');
return "$str1\n$space$str2";
}
return "$str1$space$str2";
}
1;
__END__
=head1 NAME
IM::Iso2022jp - MIME header encoder for ISO-2022-JP
=head1 SYNOPSIS
use IM::Iso2022jp;
$encoded_string_for_structured_header = struct_iso2022jp_mimefy(string);
$encoded_string_for_unstructured_header = line_iso2022jp_mimefy(string);
$rcode = header_iso2022jp_conv(\@Header, code_conv_flag);
=head1 DESCRIPTION
The I module is MIME header encoder for ISO-2022-JP.
This modules is provided by IM (Internet Message).
=head1 COPYRIGHT
IM (Internet Message) is copyrighted by IM developing team.
You can redistribute it and/or modify it under the modified BSD
license. See the copyright file for more details.
=cut
### Copyright (C) 1997, 1998, 1999 IM developing team
### All rights reserved.
###
### Redistribution and use in source and binary forms, with or without
### modification, are permitted provided that the following conditions
### are met:
###
### 1. Redistributions of source code must retain the above copyright
### notice, this list of conditions and the following disclaimer.
### 2. Redistributions in binary form must reproduce the above copyright
### notice, this list of conditions and the following disclaimer in the
### documentation and/or other materials provided with the distribution.
### 3. Neither the name of the team nor the names of its contributors
### may be used to endorse or promote products derived from this software
### without specific prior written permission.
###
### THIS SOFTWARE IS PROVIDED BY THE TEAM AND CONTRIBUTORS ``AS IS'' AND
### ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
### IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
### PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE TEAM OR CONTRIBUTORS BE
### LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
### CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
### SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
### BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
### WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
### OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
### IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.