PERL CVS PARROT 45 CVS COMMIT PARROT T OP STRING CS T
Date: 1 Mar 2005 15:41:31 -0000

Subject: cvs commit: parrot/t/op string_cs.t
From: leo@no-spam (Leopold Toetsch)

cvsuser 05/03/01 07:41:31

Modified: charset ascii.c ascii.h iso-8859-1.c src charset.c t/op string_cs.t Log:
Strings. Finally. 9 - more charset converters Revision Changes Path 1.14 +60 -15 parrot/charset/ascii.c Index: ascii.c ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- ascii.c 1 Mar 2005 14:19:45 -0000 1.13
+++ ascii.c 1 Mar 2005 15:41:25 -0000 1.14
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.13 2005/03/01 14:19:45 leo Exp $
+$Id: ascii.c,v 1.14 2005/03/01 15:41:25 leo Exp $
=head1 NAME @@no-spam -95,33 +95,43 @@no-spam offset, count, dest_string);
}
+
static STRING *
-to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING *dest)

+from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "to_charset for ascii not implemented");

+ internal_exception(UNIMPLEMENTED, "Can't do this yet");
return NULL;
}
-
static STRING *
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
+from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "to_unicode for ascii not implemented");

+ internal_exception(UNIMPLEMENTED, "Can't do this yet");
return NULL;
}
-static STRING *
-from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
+STRING *
+ascii_to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
{
- internal_exception(UNIMPLEMENTED, "Can't do this yet");
+ internal_exception(UNIMPLEMENTED,
+ "to_unicode for iso-8859-1 not implemented");
return NULL;
}
-static STRING *
-from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
+STRING *
+ascii_to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING *dest)

{
- internal_exception(UNIMPLEMENTED, "Can't do this yet");
- return NULL;
+ charset_converter_t conversion_func;
+
+ if ((conversion_func = Parrot_find_charset_converter(interpreter,
+ src->charset, new_charset))) {
+ return conversion_func(interpreter, src, dest);
+ }
+ else {
+ STRING *res = ascii_to_unicode(interpreter, src, dest);
+ return new_charset->from_charset(interpreter, res, dest);
+
+ }
}
/* A noop. can't compose ascii */
@@no-spam -506,8 +516,8 @@no-spam ascii_get_graphemes,
ascii_get_graphemes_inplace,
set_graphemes,
- to_charset,
- to_unicode,
+ ascii_to_charset,
+ ascii_to_unicode,
from_charset,
from_unicode,
compose,
@@no-spam -555,6 +565,41 @@no-spam return return_set;
}
+STRING *
+charset_cvt_ascii_to_binary(Interp *interpreter, STRING *src, STRING *dest)
+{
+ UINTVAL offs, c;
+ if (dest) {
+ Parrot_reallocate_string(interpreter, dest, src->strlen);
+ dest->bufused = src->bufused;
+ dest->strlen = src->strlen;
+ for (offs = 0; offs < src->strlen; ++offs) {
+ c = ENCODING_GET_BYTE(interpreter, src, offs);
+ ENCODING_SET_BYTE(interpreter, dest, offs, c);
+ }
+ return dest;
+ }
+ src->charset = Parrot_binary_charset_ptr;
+ return src;
+}
+
+STRING *
+charset_cvt_ascii_to_iso_8859_1(Interp *interpreter, STRING *src, STRING *dest)

+{
+ UINTVAL offs, c;
+ if (dest) {
+ Parrot_reallocate_string(interpreter, dest, src->strlen);
+ dest->bufused = src->bufused;
+ dest->strlen = src->strlen;
+ for (offs = 0; offs < src->strlen; ++offs) {
+ c = ENCODING_GET_BYTE(interpreter, src, offs);
+ ENCODING_SET_BYTE(interpreter, dest, offs, c);
+ }
+ return dest;
+ }
+ src->charset = Parrot_iso_8859_1_charset_ptr;
+ return src;
+}
/*
* Local variables:
* c-indentation-style: bsd 1.11 +5 -1 parrot/charset/ascii.h Index: ascii.h ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.h,v retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- ascii.h 1 Mar 2005 14:19:45 -0000 1.10
+++ ascii.h 1 Mar 2005 15:41:25 -0000 1.11
@@no-spam -1,7 +1,7 @@no-spam /* ascii.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: ascii.h,v 1.10 2005/03/01 14:19:45 leo Exp $
+ * $Id: ascii.h,v 1.11 2005/03/01 15:41:25 leo Exp $
* Overview:
* This is the header for the ascii charset functions * Data Structure and Algorithms:
@@no-spam -39,6 +39,8 @@no-spam INTVAL ascii_cs_rindex(Interp *, const STRING *source_string,
const STRING *search_string, UINTVAL offset);
size_t ascii_compute_hash(Interp *, STRING *source_string);
+STRING * ascii_to_unicode(Interp *, STRING *source_string, STRING *dest);
+STRING * ascii_to_charset(Interp *, STRING *src, CHARSET *new_cs, STRING *dest);

static void compose(Interp *, STRING *source_string);
static void decompose(Interp *, STRING *source_string);
@@no-spam -63,6 +65,8 @@no-spam static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL offset);

CHARSET *Parrot_charset_ascii_init(Interp *);
+STRING *charset_cvt_ascii_to_binary(Interp *, STRING *src, STRING *dest);
+STRING *charset_cvt_ascii_to_iso_8859_1(Interp *, STRING *src, STRING *dest);

#endif /* PARROT_CHARSET_ASCII_H_GUARD */
/*
1.11 +3 -27 parrot/charset/iso-8859-1.c Index: iso-8859-1.c ===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- iso-8859-1.c 1 Mar 2005 14:19:45 -0000 1.10
+++ iso-8859-1.c 1 Mar 2005 15:41:25 -0000 1.11
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: iso-8859-1.c,v 1.10 2005/03/01 14:19:45 leo Exp $
+$Id: iso-8859-1.c,v 1.11 2005/03/01 15:41:25 leo Exp $
=head1 NAME @@no-spam -81,30 +81,6 @@no-spam }
-static STRING *
-to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
-{
- internal_exception(UNIMPLEMENTED,
- "to_unicode for iso-8859-1 not implemented");
- return NULL;
-}
-
-static STRING *
-to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING *dest)

-{
- charset_converter_t conversion_func;
-
- if ((conversion_func = Parrot_find_charset_converter(interpreter,
- src->charset, new_charset))) {
- return conversion_func(interpreter, src, dest);
- }
- else {
- STRING *res = to_unicode(interpreter, src, dest);
- return new_charset->from_charset(interpreter, res, dest);
-
- }
-}
-
/* A noop. can't compose iso-8859-1 */
static void compose(Interp *interpreter, STRING *source_string)
@@no-spam -369,8 +345,8 @@no-spam ascii_get_graphemes,
ascii_get_graphemes_inplace,
set_graphemes,
- to_charset,
- to_unicode,
+ ascii_to_charset,
+ ascii_to_unicode,
from_charset,
from_unicode,
compose,
1.10 +31 -5 parrot/src/charset.c Index: charset.c ===================================================================
RCS file: /cvs/public/parrot/src/charset.c,v retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- charset.c 1 Mar 2005 14:19:48 -0000 1.9
+++ charset.c 1 Mar 2005 15:41:26 -0000 1.10
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: charset.c,v 1.9 2005/03/01 14:19:48 leo Exp $
+$Id: charset.c,v 1.10 2005/03/01 15:41:26 leo Exp $
=head1 NAME @@no-spam -15,6 +15,7 @@no-spam #define PARROT_NO_EXTERN_CHARSET_PTRS #include "parrot/parrot.h"
#include "../charset/iso-8859-1.h"
+#include "../charset/ascii.h"
CHARSET *Parrot_iso_8859_1_charset_ptr;
CHARSET *Parrot_binary_charset_ptr;
@@no-spam -59,6 +60,8 @@no-spam n = all_charsets->n_charsets;
for (i = 0; i < n; ++i) {
+ if (all_charsets->set[i].n_converters)
+ mem_sys_free(all_charsets->set[i].to_converters);
mem_sys_free(all_charsets->set[i].charset);
}
mem_sys_free(all_charsets->set);
@@no-spam -174,6 +177,24 @@no-spam return 1;
}
+static void +register_static_converters(Interp *interpreter)
+{
+ Parrot_register_charset_converter(interpreter,
+ Parrot_iso_8859_1_charset_ptr, Parrot_ascii_charset_ptr,
+ charset_cvt_iso_8859_1_to_ascii);
+ Parrot_register_charset_converter(interpreter,
+ Parrot_iso_8859_1_charset_ptr, Parrot_binary_charset_ptr,
+ charset_cvt_ascii_to_binary);
+
+ Parrot_register_charset_converter(interpreter,
+ Parrot_ascii_charset_ptr, Parrot_binary_charset_ptr,
+ charset_cvt_ascii_to_binary);
+ Parrot_register_charset_converter(interpreter,
+ Parrot_ascii_charset_ptr, Parrot_iso_8859_1_charset_ptr,
+ charset_cvt_ascii_to_iso_8859_1);
+}
+
INTVAL Parrot_register_charset(Interp *interpreter, const char *charsetname,
CHARSET *charset)
@@no-spam -199,11 +220,16 @@no-spam return register_charset(interpreter, charsetname, charset);
}
if (!strcmp("ascii", charsetname)) {
+ INTVAL result;
+
Parrot_ascii_charset_ptr = charset;
- Parrot_register_charset_converter(interpreter,
- Parrot_iso_8859_1_charset_ptr, charset,
- charset_cvt_iso_8859_1_to_ascii);
- return register_charset(interpreter, charsetname, charset);
+ result = register_charset(interpreter, charsetname, charset);
+ /*
+ * ascii is currently the last charset - so we can + * now install charset converters + */
+ register_static_converters(interpreter);
+ return result;
}
return 0;
}
1.7 +98 -2 parrot/t/op/string_cs.t Index: string_cs.t ===================================================================
RCS file: /cvs/public/parrot/t/op/string_cs.t,v retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- string_cs.t 1 Mar 2005 14:19:49 -0000 1.6
+++ string_cs.t 1 Mar 2005 15:41:31 -0000 1.7
@@no-spam -1,6 +1,6 @@no-spam #! perl -w # Copyright: 2001-2004 The Perl Foundation. All Rights Reserved.
-# $Id: string_cs.t,v 1.6 2005/03/01 14:19:49 leo Exp $
+# $Id: string_cs.t,v 1.7 2005/03/01 15:41:31 leo Exp $
=head1 NAME @@no-spam -16,7 +16,7 @@no-spam =cut -use Parrot::Test tests => 20;
+use Parrot::Test tests => 26;
use Test::More;
output_is( <<'CODE', <<OUTPUT, "basic syntax" );
@@no-spam -305,3 +305,99 @@no-spam ascii OUTPUT +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to binary");
+ set S0, "abc"
+ find_charset I0, "binary"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +binary +OUTPUT +
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i iso-8859-1 to binary");
+ set S1, "abc"
+ find_charset I0, "binary"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +binary +OUTPUT +
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to binary");
+ set S0, ascii:"abc"
+ find_charset I0, "binary"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +binary +OUTPUT +
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to binary");
+ set S1, ascii:"abc"
+ find_charset I0, "binary"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +binary +OUTPUT +
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to iso-8859-1");
+ set S0, ascii:"abc"
+ find_charset I0, "iso-8859-1"
+ trans_charset S1, S0, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +iso-8859-1
+OUTPUT +
+output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to iso-8859-1");
+ set S1, ascii:"abc"
+ find_charset I0, "iso-8859-1"
+ trans_charset S1, I0
+ print S1
+ print "\n"
+ charset I0, S1
+ charsetname S2, I0
+ print S2
+ print "\n"
+ end +CODE +abc +iso-8859-1
+OUTPUT +