[prev] [thread] [next] [lurker] [Date index for 2003/08/13]
Author: richardc Date: 2003-08-13 09:57:27 +0100 (Wed, 13 Aug 2003) New Revision: 1328 Added: trunk/mariachi/t/testmess/hey.paragraph.TODO Removed: trunk/mariachi/t/testmess/hey.paragraph Modified: trunk/mariachi/Build.PL trunk/mariachi/lib/Mariachi/Message.pm Log: move over to using Text::Original, which is an extraction of the first_* code Modified: trunk/mariachi/Build.PL =================================================================== --- trunk/mariachi/Build.PL 2003-08-13 08:50:21 UTC (rev 1327) +++ trunk/mariachi/Build.PL 2003-08-13 08:57:27 UTC (rev 1328) @@ -29,6 +29,7 @@ 'Time::HiRes' => 0, 'Template' => 0, 'Template::Plugin::Page' => 0, + 'Text::Original' => 0, 'URI::Find::Schemeless::Stricter' => 0, }, create_makefile_pl => 'passthrough', Modified: trunk/mariachi/lib/Mariachi/Message.pm =================================================================== --- trunk/mariachi/lib/Mariachi/Message.pm 2003-08-13 08:50:21 UTC (rev 1327) +++ trunk/mariachi/lib/Mariachi/Message.pm 2003-08-13 08:57:27 UTC (rev 1328) @@ -4,6 +4,7 @@ use Class::Accessor::Fast; use Digest::MD5 qw(md5_hex); use Date::Parse qw(str2time); +use Text::Original (); use Memoize; use base qw(Class::Accessor::Fast); @@ -83,140 +84,32 @@ $self->_header->{ lc $hdr } = shift; } - =head2 ->first_lines -Returns the a number of lines after the first non blank, none quoted -line of the body of the email. +=head2 ->first_paragraph -It will guess at attribution lines and skip them as well. +=head2 ->first_sentence -It will return super cited lines. This is the super-citers' -fault, not ours. +See L<Text::Original> -It won't catch all types of attribution lines; - -It can optionally be passed a number of lines to get. - =cut +*first_line = \&first_lines; sub first_lines { my $self = shift; - my $num = shift || 1; - - return $self->_significant_signal(lines => $num); + return Text::Original::first_lines( $self->body, @_ ); } -*first_line = \&first_lines; - -=head2 ->first_paragraph - -Returns the first original paragraph of the message - -=cut - sub first_paragraph { my $self = shift; - return $self->_significant_signal(para => 1); + return Text::Original::first_paragraph( $self->body ); } -=head2 ->first_sentence - -Returns the first original sentence of the message - -=cut - sub first_sentence { my $self = shift; - my $text = $self->first_paragraph(); - $text =~ s/([.?!]).*/$1/s; - return $text; + return Text::Original::first_sentence( $self->body ); } -sub _significant_signal { - my $self = shift; - my %opts = @_; - - my $return = ""; - my $lines = 0; - - # get all the lines from the main part of the body - my @lines = split /$/m, $self->body_sigless; - - # right, find the start of the original content or quoted - # content (i.e. skip past the attributation) - my $not_started = 1; - while (@lines && $not_started) { - # next line - local $_ = shift @lines; - #print "}}$_"; - - # blank lines, euurgh - next if /^\s*$/; - # quotes (we don't count quoted From's) - next if /^\s*>(?!From)/; - # skip obvious attribution - next if /^\s*On (Mon|Tue|Wed|Thu|Fri|Sat|Sun)/i; - next if /^\s*.+=? wrote:/i; - - # skip signed messages - next if /^\s*-----/; - next if /^Hash:/; - - # annoying hi messages (this won't work with i18n) - next if /^\s*(?:hello|hi|hey|greetings|salut - |good (?:morning|afternoon|day|evening)) - (?:\W.{0,14})?\s*$/ixs; - - # snips - next if m~\s* # whitespace - [<.=-_*+({\[]*? # opening bracket - (?:snip|cut|delete|deleted) # snip? - [^>}\]]*? # some words? - [>.=-_*+)}\]]*? # closing bracket - \s*$ # end of the line - ~xi; - - # [.. foo ..] or ...foo.. or so on - next if m~\s*\[?\.\..*?\.\.]?\s*$~; - - # ... or [...] - next if m~\s*\[?\.\.\.]?\s*$~; - - # if we got this far then we've probably got past the - # attibutation lines - unshift @lines, $_; # undo the shift - undef $not_started; # and say we've started. - } - - # okay, let's _try_ to build up some content then - foreach (@lines) { - # are we at the end of a paragraph? - last if (defined $opts{'para'} # paragraph mode? - && $opts{'para'}==1 - && $lines>0 # got some lines aready? - && /^\s*$/); # and now we've found a gap? - - # blank lines, euurgh - next if /^\s*$/; - # quotes (we don't count quoted From's) - next if /^\s*>(?!From)/; - - # if we got this far then the line was a useful one - $lines++; - - # sort of munged Froms - s/^>From/From/; - s/^\n+//; - $return .= "\n" if $lines>1; - $return .= $_; - last if (defined $opts{'lines'} && $opts{'lines'}==$lines); - } - return $return; -} - -memoize('_significant_signal'); - =head2 ->body_sigless Returns the body with the signature (defined as anything Deleted: trunk/mariachi/t/testmess/hey.paragraph =================================================================== --- trunk/mariachi/t/testmess/hey.paragraph 2003-08-13 08:50:21 UTC (rev 1327) +++ trunk/mariachi/t/testmess/hey.paragraph 2003-08-13 08:57:27 UTC (rev 1328) @@ -1,5 +0,0 @@ -I got mail from a consultant/recruiter in Leicester asking if I knew -of any perl gurus looking for work. Since I'm not sure where everyone -is, relatively speaking, I figured I'd pass it on. If anyone's -interested, get in touch with me and I'll forward your info on, and -we'll see where things go from there.__END__ \ No newline at end of file Copied: trunk/mariachi/t/testmess/hey.paragraph.TODO (from rev 1327, trunk/mariachi/t/testmess/hey.paragraph)
Generated at 13:57 on 01 Jul 2004 by mariachi 0.52