[prev] [thread] [next] [lurker] [Date index for 2005/02/08]
Author: simon Date: 2005-02-08 15:49:43 +0000 (Tue, 08 Feb 2005) New Revision: 1856 Modified: trunk/Email-Store-HTML/lib/Email/Store/HTML.pm trunk/Email-Store-HTML/t/01basic.t Log: Update a test, make html return all html parts and update docs Modified: trunk/Email-Store-HTML/lib/Email/Store/HTML.pm =================================================================== --- trunk/Email-Store-HTML/lib/Email/Store/HTML.pm 2005-02-08 15:49:17 UTC (rev 1855) +++ trunk/Email-Store-HTML/lib/Email/Store/HTML.pm 2005-02-08 15:49:43 UTC (rev 1856) @@ -16,6 +16,7 @@ $VERSION = "0.1"; + sub on_store_order { 2 } sub on_store { @@ -47,10 +48,86 @@ my $scrubbed = $scrubber->scrub($raw); my $text = $f->parse($raw); Email::Store::HTML->create( { mail => $mail->id, raw => $raw, scrubbed => $scrubbed, as_text => $text } ); - last; } } +=head1 NAME + +Email::Store::HTML - various HTML related functions for Email::Store::Mail + +=head1 SYNOPSIS + + my $mail = Email::Store::Mail->retrieve( $msgid ); + exit unless $mail->html; + + for ($mail->html) { + print $_->raw; # prints out the raw HTML version of the attachment + print $_->scrubbed; # prints out a scrubbed version of the mail which should be safe + print $_->as_text; # prints out a version of the HTML converted to plain text + } + +=head1 DESCRIPTION + +=head1 METHODS + +=head2 on_store <Email::Store::Mail> + +This finds every HTML attachment in the mail and performs various operations on them +before storing them as a new C<Email::Store::HTML> object. + +=head2 raw + +The raw HTML, exactly as we found it. + +=head2 scrubbed + +A scrubbed version of the HTML with things like javascript removed. + +=head2 as_text + +The HTML run through C<HTML::FormatText::WithLinks>. Links are placed after the anchor +word(a) in square brackets so that + + <a href="http://thegestalt.org">HOME!</a> + +becomes + + HOME! [ http://thegestalt.org ] + + +=head1 BUGS AND TODO + +No bugs known at the moment. + +It might be nice to give people access to to the scrubber and formatter so that they +could change the options. + +=head1 SUPPORT + +This module is part of the Perl Email Project - http://pep.kwiki.org/ + +There is a mailing list at pep@xxxx.xxx (subscribe at pep-subscribe@xxxx.xxx) +and an archive available at http://nntp.perl.org/group/pep.php + +=head1 AUTHOR + +Simon Wistow <simon@xxxxxxxxxx.xxx> + +=head1 COPYRIGHT + +Copyright 2005, Simon Wistow + +This code is distributed under the same terms as Perl itself. + + +=head1 SEE ALSO + +L<HTML::Scrubber>, L<HTML::FormatText::WithLinks> + +=cut + + + ### # Configuration for HTML::Scrubber ### @@ -102,8 +179,7 @@ package Email::Store::Mail; sub html { my ($self) = @_; - my ($html) = Email::Store::HTML->from_mail($self->message_id); - return $html; + return Email::Store::HTML->from_mail($self->message_id); } package Email::Store::HTML; Modified: trunk/Email-Store-HTML/t/01basic.t =================================================================== --- trunk/Email-Store-HTML/t/01basic.t 2005-02-08 15:49:17 UTC (rev 1855) +++ trunk/Email-Store-HTML/t/01basic.t 2005-02-08 15:49:43 UTC (rev 1856) @@ -1,4 +1,4 @@ -use Test::More tests => 13; +use Test::More tests => 14; use File::Slurp; BEGIN { unlink("t/test.db"); } use Email::Store "dbi:SQLite:dbname=t/test.db"; @@ -11,8 +11,13 @@ ok($m, "Got the mail back"); -my ($html, $body, $raw, $scrubbed, $as_text); -ok($html = $m->html, "Got html"); + +my (@html, $html, $body, $raw, $scrubbed, $as_text); +ok(@html = $m->html, "Got html"); +is(@html, 1, "Only one part"); + +$html = shift @html; + ok($body = $m->simple->body, "Got body"); ok($raw = $html->raw, "Got raw"); ok($scrubbed = $html->scrubbed, "Got scrubbed");
Generated at 16:00 on 08 Feb 2005 by mariachi 0.52