package MIME::Lite::HTML;
# module MIME::Lite::HTML : Provide routine to transform a HTML page in
# a MIME::Lite mail
# Copyright 2000 A.Barbet alian@alianwebserver.com. All rights reserved.
# $Log: HTML.pm,v $
# Revision 1.2 2001/03/20 22:35:56 alian
# - Add lot of pod documentation
# - Change how final mail is build:
# If no images are found when parse routine is used, this modules did'nt
# use a multipart/related part, but a text/html part. Thus, we can reach
# a max. of mail clients (See "clients tested" in documentation).
# - Add size function
#
# Revision 1.1 2001/03/04 22:29:07 alian
# - Correct an error with background image quote
#
# Revision 1.0 2001/03/04 22:13:19 alian
# - Correct major problem with Eudora (See Clients tested in documentation)
# - Build final MIME-Lite object with knowledge of RFC-2257
# - Add some POD documentation and references
#
# Revision 0.9 2001/02/02 01:15:35 alian
# Correct some other things with error handling (suggested by Steve Harvey
#
# Revision 0.8 2001/01/21 00:58:48 alian
# Correct error function
#
# Revision 0.7 2000/12/30 20:22:27 alian
# - Allow to send a string of text to the parse function, instead of an url
# - Add feature to put data on the fly when image are available only on memory
# - Put comments on print when buffer find url
# Ideas suggested by mtveerman@mindless.com
#
# Revision 0.6 2000/12/13 11:02:58 alian
# - Allow sup parameter for MIME-Lite in constructor
# - Add parameter for parse url to include a text file when HTML
# is not supported by client.
#
# Revision 0.5 2000/11/13 21:36:58 Administrateur
# - Arg, forgot cariage return in fill_template :-(
#
# Revision 0.4 2000/11/12 18:52:56 Administrateur
# - Add feature of replace word in gabarit (for newsletter by example)
# - Include body background
#
# Revision 0.3 2000/10/26 22:55:46 Administrateur
# Add parsing for form (action and input image)
#
# Revision 0.2 2000/10/26 20:08:06 Administrateur
# Update remplacement of relative url
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use MIME::Lite;
use strict;
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
require Exporter;
@ISA = qw(Exporter);
@EXPORT = qw();
$VERSION = ('$Revision: 1.2 $ ' =~ /(\d+\.\d+)/)[0];
=head1 NAME
MIME::Lite::HTML - Provide routine to transform a HTML page in a MIME-Lite mail
=head1 SYNOPSIS
use MIME::Lite;
use MIME::Lite::HTML;
my $mailHTML = new MIME::Lite::HTML
From => 'MIME-Lite@alianwebserver.com',
To => 'alian@jupiter',
Subject => 'Mail in HTML with images';
$MIMEmail = $mailHTML->parse('http://www.alianwebserver.com');
$MIMEmail->send; # or for win user : $mail->send_by_smtp('smtp.fai.com');
=head1 VERSION
$Revision: 1.2 $
=head1 DESCRIPTION
This module provide routine to transform a HTML page in MIME::Lite mail.
So you need this module to use MIME-Lite-HTML possibilities
=head2 What's happen ?
The job done is:
=over
=item *
Get the file (LWP) if needed
=item *
Parse page to find include images
=item *
Attach them to mail with adequat cid
=item *
Include external CSS,Javascript file
=item *
Replace relative url with absolute one
=item *
Build the final MIME-Lite object with each part found
=back
=head2 Usage
It can be used by example in a HTML newsletter. You make a classic HTML page,
and give just url to MIME::Lite::HTML.
=head2 Construction
MIME-Lite-HTML use a MIME-Lite object, and RFC2257 construction:
If images are present, construction use is:
--> multipart/alternative
------> text/plain if present
------> multipart/related
-------------> text/html
-------------> each images
If no images is present, this is that:
---> multipart/alternative
-------> text/plain if present
-------> text/html
=head2 Documentation
Additionnal documentation can be found here:
=over
=item *
MIME-lite module
=item *
RFC 822, RFC 1521, RFC 1522 and specially RFC 2257 (MIME Encapsulation
of Aggregate Documents, such as HTML)
=back
=head2 Clients tested
HTML in mail is not full supported so this module can't work with all email clients.
If some client recognize HTML, they didn't support images include in HTML.
So in fact, they recognize multipart/relative but not multipart/related.
=over
=item Netscape Messager (Linux-Windows)
100% ok
=item Outlook Express (Windows)
100% ok
=item Eudora (Windows)
If this module just send HTML and text, (without images), 100% ok.
With images, Eudora didn't recognize multipart/related part as describe in RFC 2257,
even if he can read his own HTML mail. So if images are present in HTML part,
text and HTML part will be displayed both, text part in first. Two additional
headers will be displayed in HTML part too in this case. Version 1.0 of this
module correct major problem of headers displayed with image include in
HTML part.
=item KMail (Linux)
If this module just send HTML and text, (without images), 100% ok.
In other case, Kmail didn't support image include in HTML. So if you set in KMail
"Prefer HTML to text", it display HTML with images broken. Otherwise, it display
text part.
=item Pegasus (Windows)
If this module just send HTML and text, (without images), 100% ok.
Pegasus didn't support images in HTML. When it find a multipart/related message,
it ignore it, and display text part.
=back
If you find others mail client who support (or not support) MIME-Lite-HTML
module, give me some feedback ! If you want be sure that your mail can be read by
maximum of people, (so not only OE and Netscape), don't include images in your mail,
and use a text buffer too. If multipart/related mail is not recognize, multipart/alternative
can be read by the most of mail client.
=head1 Public Interface
=over
=item new(%hash)
Create a new instance of MIME::Lite::HTML.
%hash can have this key : [Proxy], [Debug], [HashTemplate]
Others keys are use with MIME::Lite constructor.
This MIME-Lite keys are: Bcc, Encrypted, Received, Sender, Cc, From,
References, Subject, Comments, Keywords, Reply-To To, Content-*,
Message-ID,Resent-*, X-*,Date,MIME-Version,Return-Path,
Organization
$hash{'HashTemplate'} is a reference to a hash. If present, MIME::Lite::HTML will
substitute $name ?> with $hash{'HashTemplate'}{'name'} when parse url to
send. $hash{'HashTemplate'} can be used too for include data for subelement. Ex:
$hash{'HashTemplate'}{'http://www.alianwebserver.com/images/sommaire.gif'}=\@data;
or $hash{'HashTemplate'}{'http://www.alianwebserver.com/script.js'}="alert("Hello world");";
When module find the image http://www.alianwebserver.com/images/sommaire.gif in buffer,
it don't get image with LWP but use data found in $hash{'HashTemplate'}.
=cut
sub new
{
my $class = shift;
my $self = {};
bless $self, $class;
my %param = @_;
# Agent name
$self->{_AGENT} = new LWP::UserAgent "MIME-Lite-HTML $VERSION", 'alian@alianwebserver.com';
# Set debug level
if ($param{'Debug'})
{
$self->{_DEBUG} = 1;
delete $param{'Debug'};
}
# Set proxy to use to get file
if ($param{'Proxy'})
{
$self->{_AGENT}->proxy('http',$param{'Proxy'}) ;
print "Set proxy for http : ", $param{'Proxy'},"\n" if ($self->{_DEBUG});
delete $param{'Proxy'};
}
# Set hash to use with template
if ($param{'HashTemplate'})
{
$param{'HashTemplate'} = ref($param{'HashTemplate'}) eq "HASH" ? $param{'HashTemplate'} : %{$param{'HashTemplate'}}; #CHANGE
$self->{_HASH_TEMPLATE}= $param{'HashTemplate'};
delete $param{'HashTemplate'};
}
# Create multipart/alternative part
$param{'Type'}='multipart/alternative';
my $mail = new MIME::Lite (%param);
# Ok I hope I known what I do ;-)
MIME::Lite->quiet(1);
$mail->replace('X-Mailer',"MIME::Lite::HTML $VERSION");
$self->{_MAIL} = $mail;
return $self;
}
=item parse($html, [$url_txt], [$url_base])
Subroutine used for created HTML mail with MIME-Lite
Parameters:
=over
=item $html
Url of HTML file to send, can be a local file. If $url is not an
url (http or https or ftp or file or nntp), $url is used as a buffer.
Example : http://www.alianwebserver.com, file://c|/tmp/index.html
or ''.
=item $url_txt
Url of text part to send for person who doesn't support HTML mail.
As $html, $url_txt can be a simple buffer.
=item $url_base
$url_base is used if $html is a buffer, for get element found in HTML buffer.
=back
Return the MIME::Lite part to send
=cut
sub parse
{
my($self,$url_page,$url_txt,$url1)=@_;
my ($type,@mail,$gabarit,$gabarit_txt,$racinePage);
if ($url_page=~/^(https?|ftp|file|nntp):\/\//)
{
# Get content of $url_page with LWP
print "Get ", $url_page,"\n" if $self->{_DEBUG};
my $req = new HTTP::Request('GET' => $url_page);
my $res = $self->{_AGENT}->request($req);
if (!$res->is_success) {$self->set_err("$url_page n'est pas accessible");}
else {$gabarit = $res->content;}
$racinePage=$res->base;
}
else {$gabarit=$url_page;$racinePage=$url1;}
# Get content of $url_txt with LWP if needed
if ($url_txt)
{
if ($url_txt=~/^(https?|ftp|file|nntp):\/\//)
{
print "Get ", $url_txt,"\n" if $self->{_DEBUG};
my $req2 = new HTTP::Request('GET' => $url_txt);
my $res3 = $self->{_AGENT}->request($req2);
if (!$res3->is_success) {$self->set_err("$url_txt n'est pas accessible");}
else {$gabarit_txt = $res3->content;}
}
else {$gabarit_txt=$url_txt;}
}
# Get all images and create part for each of them
my $analyseur = HTML::LinkExtor->new;
$analyseur->parse($gabarit);
my @l = $analyseur->links;
# Include external CSS files
$gabarit = $self->include_css($gabarit,$racinePage);
# Include external Javascript files
$gabarit = $self->include_javascript($gabarit,$racinePage);
($gabarit,@mail) = $self->input_image($gabarit,$racinePage);
$gabarit = $self->link_form($gabarit,$racinePage);
my (%images_read,%url_remplace);
# Scan each part found by linkExtor
foreach my $url (@l)
{#print @$url,"\n";
my $urlAbs = URI::WithBase->new($$url[2],$racinePage)->abs;
# Replace relative href found to absolute one
if ( ($$url[0] eq 'a')
&& ($$url[1] eq 'href')
&& ($$url[2]!~m!^http://!)
&& (!$url_remplace{$urlAbs}) )
{
$gabarit=~s/href="?'?$$url[2]("?|'?)/href="$urlAbs"/gim;
print "Replace ", $$url[2]," with ",$urlAbs,"\n" if $self->{_DEBUG};
$url_remplace{$urlAbs}=1;
}
# Replace images background
elsif ($$url[1] eq 'background')
{
my $v ="background=\"cid:$urlAbs\"";
$gabarit=~s/background=\"$$url[2]\"/$v/im;
print "Get ", $urlAbs,"\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request
(new HTTP::Request('GET' => $urlAbs));
# Create MIME type
if (lc($urlAbs)=~/gif$/) {$type = "image/gif";}
else {$type = "image/jpg";}
# Create part
my $mail = new MIME::Lite
Data => $res2->content,
Encoding =>'base64';
$mail->attr("Content-type"=>$type);
$mail->attr('Content-ID'=>$urlAbs);
push(@mail,$mail);
}
# Get only new
next if ((lc($$url[0]) ne 'img') && (lc($$url[0]) ne 'src') || ($images_read{$urlAbs}) );
# Create MIME type
if (lc($urlAbs)=~/gif$/) {$type = "image/gif";}
else {$type = "image/jpg";}
my $buff1;
if ($self->{_HASH_TEMPLATE}{$urlAbs})
{
print "Using buffer on: ", $urlAbs,"\n" if $self->{_DEBUG};
$buff1 = ref($self->{_HASH_TEMPLATE}{$urlAbs}) eq "ARRAY" ? join "", @{$self->{_HASH_TEMPLATE}{$urlAbs}} : $self->{_HASH_TEMPLATE}{$urlAbs};
delete $self->{_HASH_TEMPLATE}{$urlAbs};
}
else
{
# Get image
print "Get ", $urlAbs,"\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request(new HTTP::Request('GET' => $urlAbs));
if (!$res2->is_success) {$self->set_err("Can't get $urlAbs\n");}
$buff1=$res2->content;
}
# Create part
$images_read{$urlAbs}=1;
my $mail = new MIME::Lite
Data => $buff1,
Encoding =>'base64';
$mail->replace("X-Mailer" => "");
my $t = '<'.$urlAbs.'>';
$mail->attr("Content-type"=>$type);
$mail->attr('Content-ID' =>$t);
push(@mail,$mail);
}
# Replace in HTML link with image with cid:key
sub pattern_image {return 'new($_[1],$_[2])->abs.'"';}
$gabarit=~s/]*)src=(["']?)([^"'> ]*)(["']?)/pattern_image($1,$3,$racinePage)/ieg;
# Substitue value in template if needed
if (scalar keys %{$self->{_HASH_TEMPLATE}}!=0)
{$gabarit=$self->fill_template($gabarit,$self->{_HASH_TEMPLATE});}
# Create MIME-Lite object
$self->build_mime_object($gabarit,$gabarit_txt,@mail);
return $self->{_MAIL};
}
=item size()
Display size of mail in characters (so octets) that will be send.
(So use it *after* parse method). Use this method for control
size of mail send, I personnaly hate receive 500k by mail.
I pay for a 33k modem :-(
=cut
sub size
{
my ($self)=shift;
return length($self->{_MAIL}->as_string);
}
=back
=head1 Private methods
=over
=item build_mime_object($html,[$txt],[@mail])
(private)
Build the final MIME-Lite object to send with each part read before
=over
=item $html
Buffer of HTML part
=item $txt
Buffer of text part
=item @mail
List of images attached to HTML part. Each item is a MIME-Lite object.
=back
See "Construction" in "Description" for know how MIME-Lite object is build.
=cut
sub build_mime_object
{
my ($self,$html,$txt,@mail)=@_;
# Create part for HTML
my $part = new MIME::Lite
'Type' =>'TEXT',
'Encoding'=>'quoted-printable',
'Data' =>$html;
$part->attr("content-type"=> "text/html; charset=iso-8859-1");
my $email2;
# Create related part if images
if (@mail)
{
$email2 = new MIME::Lite ('Type'=>'multipart/related');
# Remove some header for Eudora client in HTML and related part
$part->replace("MIME-Version" => "");
$part->replace('X-Mailer',"MIME::Lite::HTML $VERSION");
$email2->replace("Content-transfer-encoding" => "");
$email2->replace("MIME-Version" => "");
$email2->replace("X-Mailer" => "");
# Attach HTML part to related part
$email2->attach($part);
# Attach each image to related part
foreach (@mail) {$email2->attach($_);} # Attach list of part
}
if ($txt)
{
# Create part for text if needed
my $part2 = new MIME::Lite 'Type' => 'TEXT', 'Encoding' => '7bit', 'Data' => $txt;
$part2->attr("content-type"=> "text/plain; charset=us-ascii");
# Remove some header for Eudora client
$part2->replace("MIME-Version" => "");
$part2->replace("X-Mailer" => "");
# Attach text part to relative part
$self->{_MAIL}->attach($part2);
}
# Attach related part to alternative part
if (@mail) {$self->{_MAIL}->attach($email2);}
# Attach HTML part to alternative part
else {$self->{_MAIL}->attach($part);}
}
=item include_css($gabarit,$root)
(private)
Search in HTML buffer ($gabarit) to remplace call to extern CSS file
with his content. $root is original absolute url where css file will
be found.
=cut
sub include_css
{
my ($self,$gabarit,$root)=@_;
sub pattern_css
{
my ($self,$url,$milieu,$fin,$root)=@_;
my $ur = URI::URL->new($url, $root)->abs;
print "Include CSS file $ur\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request(new HTTP::Request('GET' => $ur));
print "Ok file downloaded\n" if $self->{_DEBUG};
return '\n";
}
$gabarit=~s/]*?)href="?([^" ]*css)"?([^>]*)>/$self->pattern_css($2,$1,$3,$root)/iegm;
print "Done CSS\n" if $self->{_DEBUG};
return $gabarit;
}
=item include_javascript($gabarit,$root)
(private)
Search in HTML buffer ($gabarit) to remplace call to extern javascript file
with his content. $root is original absolute url where javascript file will
be found.
=cut
sub include_javascript
{
my ($self,$gabarit,$root)=@_;
sub pattern_js
{
my ($self,$url,$milieu,$fin,$root)=@_;
my $ur = URI::URL->new($url, $root)->abs;
print "Include Javascript file $ur\n" if $self->{_DEBUG};
my $res2 = $self->{_AGENT}->request(new HTTP::Request('GET' => $ur));
my $content = $res2->content;
print "Ok file downloaded\n" if $self->{_DEBUG};
return "\n"."\n".
'\n";
}
$gabarit=~s/