PERL PERL5 CHANGES 13 CHANGE 23260 INTEGRATE
Date: Sat, 4 Sep 2004 13:45:00 -0700

Subject: Change 23260: Integrate:
From: nick@no-spam (Nicholas Clark)

Change 23260 by nicholas@no-spam on 2004/09/04 20:18:50

Integrate:
[ 22997]
Cleanup the main regex in Text::ParseWords and make the parse_line() routine faster. Add a Unicode test case.

[ 23060]
Failing matches don't reset numbered variables.
Change #22997 could cause Text::ParseWords to loop forever if the regex didn't not match. Explicitly return if the match fails.

Affected files ...

... //depot/maint-5.8/perl/lib/Text/ParseWords.pm#6 integrate ... //depot/maint-5.8/perl/lib/Text/ParseWords.t#4 integrate
Differences ...

==== //depot/maint-5.8/perl/lib/Text/ParseWords.pm#6 (text) ====
Index: perl/lib/Text/ParseWords.pm --- perl/lib/Text/ParseWords.pm#5~23049~ Mon Jul 5 10:46:57 2004
+++ perl/lib/Text/ParseWords.pm Sat Sep 4 13:18:50 2004
@@no-spam -1,7 +1,7 @@no-spam package Text::ParseWords;
use vars qw($VERSION @no-spam @no-spam $PERL_SINGLE_QUOTE);
-$VERSION = "3.22";
+$VERSION = "3.23";
require 5.000;
@@no-spam -53,32 +53,27 @@no-spam use re 'taint'; # if it's tainted, leave it as such my($delimiter, $keep, $line) = @no-spam - my($quote, $quoted, $unquoted, $delim, $word, @no-spam + my($word, @no-spam while (length($line)) {
-
- ($quote, $quoted, undef, $unquoted, $delim, undef) =
- $line =~ m/^(["']) # a $quote - ((?:\\[\000-\377]|(?!\1)[^\\])*) # and $quoted text - \1 # followed by the same quote - ([\000-\377]*) # and the rest - | # --OR--
- ^((?:\\[\000-\377]|[^\\"'])*?) # an $unquoted text - (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) - # plus EOL, delimiter, or quote - ([\000-\377]*) # the rest - /x; # extended layout - return() unless( $quote || length($unquoted) || length($delim));
-
- $line = $+;
+ $line =~ s/^(["']) # a $quote + ((?:\\.|(?!\1)[^\\])*) # and $quoted text + \1 # followed by the same quote + | # --OR--
+ ^((?:\\.|[^\\"'])*?) # an $unquoted text + (\Z(?!\n)|(?-x:$delimiter)|(?!^)(?=["'])) + # plus EOL, delimiter, or quote + //xs or return; # extended layout + my($quote, $quoted, $unquoted, $delim) = ($1, $2, $3, $4);
+ return() unless( defined($quote) || length($unquoted) || length($delim));
if ($keep) {
$quoted = "$quote$quoted$quote";
}
else {
- $unquoted =~ s/\\([\000-\377])/$1/g;
+ $unquoted =~ s/\\(.)/$1/sg;
if (defined $quote) {
- $quoted =~ s/\\([\000-\377])/$1/g if ($quote eq '"');
+ $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
$quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
}
}

==== //depot/maint-5.8/perl/lib/Text/ParseWords.t#4 (xtext) ====
Index: perl/lib/Text/ParseWords.t --- perl/lib/Text/ParseWords.t#3~23049~ Mon Jul 5 10:46:57 2004
+++ perl/lib/Text/ParseWords.t Sat Sep 4 13:18:50 2004
@@no-spam -8,7 +8,7 @@no-spam use warnings;
use Text::ParseWords;
-print "1..20\n";
+print "1..22\n";
@no-spam = shellwords(qq(foo "bar quiz" zoo));
print "not " if $words[0] ne 'foo';
@@no-spam -119,3 +119,16 @@no-spam $result = join('|', parse_line("\t", 0, $string));
print "not " unless $result eq "field1|field2\nstill field2|field3";
print "ok 20\n";
+
+# unicode +$string = qq{"field1"\x{1234}"field2\\\x{1234}still field2"\x{1234}"field3"};
+$result = join('|', parse_line("\x{1234}", 0, $string));
+print "not " unless $result eq "field1|field2\x{1234}still field2|field3";
+print "ok 21\n";
+
+# missing quote after matching regex used to hang after change #22997
+"1234" =~ /(1)(2)(3)(4)/;
+$string = qq{"missing quote};
+$result = join('|', shellwords($string));
+print "not " unless $result eq "";
+print "ok 22\n";
End of Patch.