PERL CVS PARROT 41 CVS COMMIT PARROT SRC CHARSET C
Date: 1 Mar 2005 08:31:03 -0000

Subject: cvs commit: parrot/src charset.c
From: leo@no-spam (Leopold Toetsch)

cvsuser 05/03/01 00:31:02

Modified: include/parrot charset.h pf pf_items.c src charset.c Log:
Strings. Finally. 6 - write charset to pbc; resolve #34285
Revision Changes Path 1.8 +2 -1 parrot/include/parrot/charset.h Index: charset.h ===================================================================
RCS file: /cvs/public/parrot/include/parrot/charset.h,v retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- charset.h 28 Feb 2005 13:35:43 -0000 1.7
+++ charset.h 1 Mar 2005 08:30:56 -0000 1.8
@@no-spam -1,7 +1,7 @@no-spam /* charset.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: charset.h,v 1.7 2005/02/28 13:35:43 leo Exp $
+ * $Id: charset.h,v 1.8 2005/03/01 08:30:56 leo Exp $
* Overview:
* This is the header for the 8-bit fixed-width encoding * Data Structure and Algorithms:
@@no-spam -83,6 +83,7 @@no-spam void Parrot_deinit_charsets(Interp *);
INTVAL Parrot_charset_number(Interp *, STRING *charsetname);
STRING* Parrot_charset_name(Interp *, INTVAL);
+const char* Parrot_charset_c_name(Interp *, INTVAL);
INTVAL Parrot_charset_number_of_str(Interp *, STRING *src);
struct _charset {
1.20 +18 -37 parrot/pf/pf_items.c Index: pf_items.c ===================================================================
RCS file: /cvs/public/parrot/pf/pf_items.c,v retrieving revision 1.19
retrieving revision 1.20
diff -u -r1.19 -r1.20
--- pf_items.c 27 Feb 2005 09:58:46 -0000 1.19
+++ pf_items.c 1 Mar 2005 08:31:00 -0000 1.20
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: pf_items.c,v 1.19 2005/02/27 09:58:46 leo Exp $
+$Id: pf_items.c,v 1.20 2005/03/01 08:31:00 leo Exp $
=head1 NAME @@no-spam -497,16 +497,16 @@no-spam PF_fetch_string(Parrot_Interp interp, struct PackFile *pf, opcode_t **cursor)

{
UINTVAL flags;
- opcode_t representation;
+ opcode_t charset_nr;
size_t size;
- STRING *s;
+ STRING *s, *cs;
int wordsize = pf ? pf->header->wordsize : sizeof(opcode_t);
- const char *encoding_name = NULL;
+ const char *charset_name;
flags = PF_fetch_opcode(pf, cursor);
/* don't let PBC mess our internals - only constant or not */
flags &= (PObj_constant_FLAG | PObj_private7_FLAG);
- representation = PF_fetch_opcode(pf, cursor);
+ charset_nr = PF_fetch_opcode(pf, cursor);
/* These may need to be separate */
size = (size_t)PF_fetch_opcode(pf, cursor);
@@no-spam -514,41 +514,14 @@no-spam /* #define TRACE_PACKFILE 1 */
#if TRACE_PACKFILE PIO_eprintf(NULL, "PF_fetch_string(): flags are 0x%04x...\n", flags);
- PIO_eprintf(NULL, "PF_fetch_string(): representation is %ld...\n",
- representation);
+ PIO_eprintf(NULL, "PF_fetch_string(): charset_nr is %ld...\n",
+ charset_nr);
PIO_eprintf(NULL, "PF_fetch_string(): size is %ld...\n", size);
#endif - if( size == 0 && representation == enum_stringrep_unknown )
- {
- representation = enum_stringrep_one;
- }
-
- /* check if we need to worry about byte order */
- if( (representation == enum_stringrep_one)
- || (pf->header->byteorder == PARROT_BIGENDIAN) ) /* byte order ok*/
- {
- encoding_name =
- string_primary_encoding_for_representation(interp,
- representation);
- }
- else /* byte order mismatch */
- {
- /* ICU has special encoding names to represent the byte-swapped - case, so we don't need to do that logic ourselves. That is,
- ICU will figure out if we mean UTF-16BE or UTF-16LE, etc. */
-
- if( representation == enum_stringrep_two )
- {
- encoding_name = "UTF16_OppositeEndian";
- }
- else /* representation == enum_stringrep_four */
- {
- encoding_name = "UTF32_OppositeEndian";
- }
- }
- s = string_make(interp, *cursor, size, encoding_name, flags);
+ charset_name = Parrot_charset_c_name(interp, charset_nr);
+ s = string_make(interp, *cursor, size, charset_name, flags);
#if TRACE_PACKFILE PIO_eprintf(NULL, "PF_fetch_string(): string is: ");
@@no-spam -591,7 +564,15 @@no-spam }
*cursor++ = PObj_get_FLAGS(s); /* only constant_FLAG and private7 */
- *cursor++ = enum_stringrep_one;
+ /*
+ * TODO as soon as we have dynamically loadable charsets + * we have to store the charset name, not the number + *
+ * TODO encoding + *
+ * see also PF_fetch_string + */
+ *cursor++ = Parrot_charset_number_of_str(NULL, s);
*cursor++ = s->bufused;
/* Switch to char * since rest of string is addressed by 1.8 +9 -1 parrot/src/charset.c Index: charset.c ===================================================================
RCS file: /cvs/public/parrot/src/charset.c,v retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- charset.c 28 Feb 2005 15:10:57 -0000 1.7
+++ charset.c 1 Mar 2005 08:31:02 -0000 1.8
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: charset.c,v 1.7 2005/02/28 15:10:57 leo Exp $
+$Id: charset.c,v 1.8 2005/03/01 08:31:02 leo Exp $
=head1 NAME @@no-spam -126,6 +126,14 @@no-spam return all_charsets->set[number_of_charset].name;
}
+const char *
+Parrot_charset_c_name(Interp *interpreter, INTVAL number_of_charset)
+{
+ if (number_of_charset >= all_charsets->n_charsets)
+ return NULL;
+ return all_charsets->set[number_of_charset].charset->name;
+}
+
static INTVAL register_charset(Interp *interpreter, const char *charsetname,
CHARSET *charset)