Change 23260 by nicholas@no-spam on 2004/09/04 20:18:50
Integrate:
[ 22997]
Cleanup the main regex in Text::ParseWords and make the
parse_line() routine faster. Add a Unicode test case.
[ 23060]
Failing matches don't reset numbered variables.
Change #22997 could cause Text::ParseWords to loop forever if the
regex didn't not match. Explicitly return if the match fails.
Affected files ...
... //depot/maint-5.8/perl/lib/Text/ParseWords.pm#6 integrate
... //depot/maint-5.8/perl/lib/Text/ParseWords.t#4 integrate
Differences ...
==== //depot/maint-5.8/perl/lib/Text/ParseWords.pm#6 (text) ====
Index: perl/lib/Text/ParseWords.pm
--- perl/lib/Text/ParseWords.pm#5~23049~ Mon Jul 5 10:46:57 2004
+++ perl/lib/Text/ParseWords.pm Sat Sep 4 13:18:50 2004
@@no-spam -1,7 +1,7 @@no-spam
package Text::ParseWords;
use vars qw($VERSION @no-spam @no-spam $PERL_SINGLE_QUOTE);
-$VERSION = "3.22";
+$VERSION = "3.23";
require 5.000;
@@no-spam -53,32 +53,27 @@no-spam
use re 'taint'; # if it's tainted, leave it as such
my($delimiter, $keep, $line) = @no-spam
- my($quote, $quoted, $unquoted, $delim, $word, @no-spam
+ my($word, @no-spam
while (length($line)) {
-
- ($quote, $quoted, undef, $unquoted, $delim, undef) =
- $line =~ m/^(["']) # a $quote
- ((?:\\[\000-\377]|(?!\1)[^\\])*) # and $quoted text
- \1 # followed by the same quote
- ([\000-\377]*) # and the rest
- | # --OR--
- ^((?:\\[\000-\377]|[^\\"'])*?) # an $unquoted text
- (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
- # plus EOL, delimiter, or quote
- ([\000-\377]*) # the rest
- /x; # extended layout
- return() unless( $quote || length($unquoted) || length($delim));
-
- $line = $+;
+ $line =~ s/^(["']) # a $quote
+ ((?:\\.|(?!\1)[^\\])*) # and $quoted text
+ \1 # followed by the same quote
+ | # --OR--
+ ^((?:\\.|[^\\"'])*?) # an $unquoted text
+ (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["']))
+ # plus EOL, delimiter, or quote
+ //xs or return; # extended layout
+ my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
+ return() unless( defined($quote) || length($unquoted) || length($delim));
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
- $unquoted =~ s/\\([\000-\377])/$1/g;
+ $unquoted =~ s/\\(.)/$1/sg;
if (defined $quote) {
- $quoted =~ s/\\([\000-\377])/$1/g if ($quote eq '"');
+ $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
}
}
==== //depot/maint-5.8/perl/lib/Text/ParseWords.t#4 (xtext) ====
Index: perl/lib/Text/ParseWords.t
--- perl/lib/Text/ParseWords.t#3~23049~ Mon Jul 5 10:46:57 2004
+++ perl/lib/Text/ParseWords.t Sat Sep 4 13:18:50 2004
@@no-spam -8,7 +8,7 @@no-spam
use warnings;
use Text::ParseWords;
-print "1..20\n";
+print "1..22\n";
@no-spam = shellwords(qq(foo "bar quiz" zoo));
print "not " if $words[0] ne 'foo';
@@no-spam -119,3 +119,16 @@no-spam
$result = join('|', parse_line("\t", 0, $string));
print "not " unless $result eq "field1|field2\nstill field2|field3";
print "ok 20\n";
+
+# unicode
+$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
+$result = join('|', parse_line("\x{1234}", 0, $string));
+print "not " unless $result eq "field1|field2\x{1234}still field2|field3";
+print "ok 21\n";
+
+# missing quote after matching regex used to hang after change #22997
+"1234" =~ /(1)(2)(3)(4)/;
+$string = qq{"missing quote};
+$result = join('|', shellwords($string));
+print "not " unless $result eq "";
+print "ok 22\n";
End of Patch.