rev 1328 - in trunk/mariachi: . lib/Mariachi t/testmess

[prev] [thread] [next] [lurker] [Date index for 2003/08/13]

From: richardc
Subject: rev 1328 - in trunk/mariachi: . lib/Mariachi t/testmess
Date: 09:57 on 13 Aug 2003
Author: richardc
Date: 2003-08-13 09:57:27 +0100 (Wed, 13 Aug 2003)
New Revision: 1328

Added:
   trunk/mariachi/t/testmess/hey.paragraph.TODO
Removed:
   trunk/mariachi/t/testmess/hey.paragraph
Modified:
   trunk/mariachi/Build.PL
   trunk/mariachi/lib/Mariachi/Message.pm
Log:
move over to using Text::Original, which is an extraction of the first_* code

Modified: trunk/mariachi/Build.PL
===================================================================
--- trunk/mariachi/Build.PL	2003-08-13 08:50:21 UTC (rev 1327)
+++ trunk/mariachi/Build.PL	2003-08-13 08:57:27 UTC (rev 1328)
@@ -29,6 +29,7 @@
         'Time::HiRes'     => 0,
         'Template'        => 0,
         'Template::Plugin::Page' => 0,
+        'Text::Original'  => 0,
         'URI::Find::Schemeless::Stricter' => 0,
     },
     create_makefile_pl => 'passthrough',

Modified: trunk/mariachi/lib/Mariachi/Message.pm
===================================================================
--- trunk/mariachi/lib/Mariachi/Message.pm	2003-08-13 08:50:21 UTC (rev 1327)
+++ trunk/mariachi/lib/Mariachi/Message.pm	2003-08-13 08:57:27 UTC (rev 1328)
@@ -4,6 +4,7 @@
 use Class::Accessor::Fast;
 use Digest::MD5 qw(md5_hex);
 use Date::Parse qw(str2time);
+use Text::Original ();
 use Memoize;
 
 use base qw(Class::Accessor::Fast);
@@ -83,140 +84,32 @@
     $self->_header->{ lc $hdr } = shift;
 }
 
-
 =head2 ->first_lines
 
-Returns the a number of lines after the first non blank, none quoted
-line of the body of the email.
+=head2 ->first_paragraph
 
-It will guess at attribution lines and skip them as well.
+=head2 ->first_sentence
 
-It will return super cited lines. This is the super-citers'
-fault, not ours.
+See L<Text::Original>
 
-It won't catch all types of attribution lines;
-
-It can optionally be passed a number of lines to get.
-
 =cut
 
+*first_line = \&first_lines;
 sub first_lines {
     my $self = shift;
-    my $num  = shift || 1;
-
-    return $self->_significant_signal(lines => $num);
+    return Text::Original::first_lines( $self->body, @_ );
 }
 
-*first_line = \&first_lines;
-
-=head2 ->first_paragraph
-
-Returns the first original paragraph of the message
-
-=cut
-
 sub first_paragraph {
     my $self = shift;
-    return $self->_significant_signal(para => 1);
+    return Text::Original::first_paragraph( $self->body );
 }
 
-=head2 ->first_sentence
-
-Returns the first original sentence of the message
-
-=cut
-
 sub first_sentence {
     my $self = shift;
-    my $text = $self->first_paragraph();
-    $text =~ s/([.?!]).*/$1/s;
-    return $text;
+    return Text::Original::first_sentence( $self->body );
 }
 
-sub _significant_signal {
-    my $self = shift;
-    my %opts = @_;
-
-    my $return = "";
-    my $lines  = 0;
-
-    # get all the lines from the main part of the body
-    my @lines = split /$/m, $self->body_sigless;
-
-    # right, find the start of the original content or quoted
-    # content (i.e. skip past the attributation)
-    my $not_started = 1;
-    while (@lines && $not_started) {
-        # next line
-        local $_ = shift @lines;
-        #print "}}$_";
-
-        # blank lines, euurgh
-        next if /^\s*$/;
-        # quotes (we don't count quoted From's)
-        next if /^\s*>(?!From)/;
-        # skip obvious attribution
-        next if /^\s*On (Mon|Tue|Wed|Thu|Fri|Sat|Sun)/i;
-        next if /^\s*.+=? wrote:/i;
-
-        # skip signed messages
-        next if /^\s*-----/;
-        next if /^Hash:/;
-
-        # annoying hi messages (this won't work with i18n)
-        next if /^\s*(?:hello|hi|hey|greetings|salut
-                        |good (?:morning|afternoon|day|evening))
-                 (?:\W.{0,14})?\s*$/ixs;
-
-        # snips
-        next if m~\s*                          # whitespace
-                  [<.=-_*+({\[]*?              # opening bracket
-                  (?:snip|cut|delete|deleted)  # snip?
-                  [^>}\]]*?                    # some words?
-                  [>.=-_*+)}\]]*?              # closing bracket
-                 \s*$                          # end of the line
-                 ~xi;
-
-        # [.. foo ..] or ...foo.. or so on
-        next if m~\s*\[?\.\..*?\.\.]?\s*$~;
-
-        # ... or [...]
-        next if m~\s*\[?\.\.\.]?\s*$~;
-
-        # if we got this far then we've probably got past the
-        # attibutation lines
-        unshift @lines, $_;  # undo the shift
-        undef $not_started;  # and say we've started.
-    }
-
-    # okay, let's _try_ to build up some content then
-    foreach (@lines) {
-        # are we at the end of a paragraph?
-        last if (defined $opts{'para'}  # paragraph mode?
-                 && $opts{'para'}==1
-                 && $lines>0            # got some lines aready?
-                 && /^\s*$/);           # and now we've found a gap?
-
-        # blank lines, euurgh
-        next if /^\s*$/;
-        # quotes (we don't count quoted From's)
-        next if /^\s*>(?!From)/;
-
-        # if we got this far then the line was a useful one
-        $lines++;
-
-        # sort of munged Froms
-        s/^>From/From/;
-        s/^\n+//;
-        $return .= "\n" if $lines>1;
-        $return .= $_;
-        last if (defined $opts{'lines'} && $opts{'lines'}==$lines);
-    }
-    return $return;
-}
-
-memoize('_significant_signal');
-
 =head2 ->body_sigless
 
 Returns the body with the signature (defined as anything

Deleted: trunk/mariachi/t/testmess/hey.paragraph
===================================================================
--- trunk/mariachi/t/testmess/hey.paragraph	2003-08-13 08:50:21 UTC (rev 1327)
+++ trunk/mariachi/t/testmess/hey.paragraph	2003-08-13 08:57:27 UTC (rev 1328)
@@ -1,5 +0,0 @@
-I got mail from a consultant/recruiter in Leicester asking if I knew 
-of any perl gurus looking for work. Since I'm not sure where everyone 
-is, relatively speaking, I figured I'd pass it on. If anyone's 
-interested, get in touch with me and I'll forward your info on, and 
-we'll see where things go from there.__END__
\ No newline at end of file

Copied: trunk/mariachi/t/testmess/hey.paragraph.TODO (from rev 1327, trunk/mariachi/t/testmess/hey.paragraph)

Generated at 13:57 on 01 Jul 2004 by mariachi 0.52