[prev] [thread] [next] [lurker] [Date index for 2005/02/25]
Author: simon Date: 2005-02-25 10:19:14 +0000 (Fri, 25 Feb 2005) New Revision: 1953 Modified: trunk/Email-Store-HTML/lib/Email/Store/HTML.pm Log: Add support for inline HTML Modified: trunk/Email-Store-HTML/lib/Email/Store/HTML.pm =================================================================== --- trunk/Email-Store-HTML/lib/Email/Store/HTML.pm 2005-02-25 10:18:43 UTC (rev 1952) +++ trunk/Email-Store-HTML/lib/Email/Store/HTML.pm 2005-02-25 10:19:14 UTC (rev 1953) @@ -12,6 +12,8 @@ use HTML::Scrubber; use HTML::FormatText::WithLinks; +use HTML::Parser; +use Digest::MD5 qw(md5_hex); use vars qw($VERSION @allow @rules @default); $VERSION = "0.3"; @@ -29,7 +31,14 @@ footnote => '' ); + my $text = ""; + my $p = HTML::Parser->new(api_version => 3, + text_h => [ sub { $text .= shift }, "dtext" ] + ); + + + # create the scrubber my $scrubber = HTML::Scrubber->new( allow => \@allow, @@ -46,20 +55,36 @@ next unless $_->content_type eq 'text/html'; my $raw = $_->payload; my $scrubbed = $scrubber->scrub($raw); - my $text = $f->parse($raw); + + $text = ""; + $p->parse($raw); + Email::Store::HTML->create( { mail => $mail->id, raw => $raw, scrubbed => $scrubbed, as_text => $text } ); } my $ct = $mail->simple->header('Content-Type') || ""; if ($ct =~ m!text/html!i) { - my $raw = $mail->utf8_body; - my $scrubbed = $scrubber->scrub($raw); - my $text = $f->parse($raw); + my $raw = $mail->utf8_body; + my $scrubbed = $scrubber->scrub($raw); + my $mime = Email::MIME->new($mail->message); + my $charset = $mime->{ct}->{attributes}{charset}; + + # extract raw text + $text = ""; + $p->parse($mime->body); + Email::Store::HTML->create( { mail => $mail->id, raw => $raw, scrubbed => $scrubbed, as_text => $text } ); + + $mime->body_set($text); + $mail->message($mime->as_string); + undef $mail->{simple}; # Invalidate cache + $mail->update; + + } - return unless ($ct =~ m!text/html!i || $mail->utf8_body =~ /^\s*$/s); + return unless ($mail->utf8_body =~ /^\s*$/s); my @htmls = $mail->html; return unless @htmls;
Generated at 11:00 on 25 Feb 2005 by mariachi 0.52