PERL CVS PARROT 42 CVS COMMIT PARROT CHARSET ASCII C ASCII H BINARY C BINARY H ISO 8859 1 C ISO 8859 1 H
Date: 1 Mar 2005 11:06:27 -0000

Subject: cvs commit: parrot/charset ascii.c ascii.h binary.c binary.h iso-8859-1.c iso-8859-1.h
From: leo@no-spam (Leopold Toetsch)

cvsuser 05/03/01 03:06:27

Modified: charset ascii.c ascii.h binary.c binary.h iso-8859-1.c iso-8859-1.h Log:
Strings. Finally. 7 - binary charset; sanity checks * fill blanks in binary charset * some ascii/iso-8859-1 functions assume fixed_8 encoding assert this Revision Changes Path 1.12 +88 -31 parrot/charset/ascii.c Index: ascii.c ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v retrieving revision 1.11
retrieving revision 1.12
diff -u -r1.11 -r1.12
--- ascii.c 28 Feb 2005 18:01:21 -0000 1.11
+++ ascii.c 1 Mar 2005 11:06:26 -0000 1.12
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.11 2005/02/28 18:01:21 leo Exp $
+$Id: ascii.c,v 1.12 2005/03/01 11:06:26 leo Exp $
=head1 NAME @@no-spam -17,6 +17,7 @@no-spam #include "parrot/parrot.h"
#include "ascii.h"
+#include <assert.h>
/* The encoding we prefer, given a choice */
static ENCODING *preferred_encoding;
@@no-spam -51,7 +52,7 @@no-spam {
for (; start < string->strlen; start++) {
- if (table[ENCODING_GET_CODEPOINT(interpreter, string, start)] & type) {

+ if (table[ENCODING_GET_BYTE(interpreter, string, start)] & type) {
return start;
}
}
@@no-spam -62,20 +63,12 @@no-spam ascii_find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
unsigned char type, const unsigned char *table)
{
- INTVAL retval = -1;
- INTVAL found = 0;
-
for (; start < string->strlen; start++) {
- if (!(table[ENCODING_GET_CODEPOINT(interpreter, string, start)]
- &type)) {
- found = 1;
- break;
+ if (!(table[ENCODING_GET_BYTE(interpreter, string, start)] & type)) {

+ return start;
}
}
- if (found) {
- retval = start;
- }
- return retval;
+ return -1;
}
STRING *
@@no-spam -157,6 +150,7 @@no-spam return;
}
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
for (offset = 0; offset < source_string->strlen; offset++) {
@@no-spam -172,6 +166,7 @@no-spam if (!source_string->strlen) {
return;
}
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
for (offset = 0; offset < source_string->strlen; offset++) {
@@no-spam -187,6 +182,7 @@no-spam if (!source_string->strlen) {
return;
}
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
buffer[0] = toupper(buffer[0]);
@@no-spam -202,6 +198,7 @@no-spam if (!source_string->strlen) {
return;
}
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
buffer[0] = toupper(buffer[0]);
@@no-spam -214,6 +211,7 @@no-spam if (!source_string->strlen) {
return;
}
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
buffer[0] = toupper(buffer[0]);
@@no-spam -226,29 +224,83 @@no-spam if (!source_string->strlen) {
return;
}
+
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
Parrot_unmake_COW(interpreter, source_string);
buffer = source_string->strstart;
buffer[0] = toupper(buffer[0]);
}
-static INTVAL -compare(Interp *interpreter, STRING *lhs, STRING *rhs)
+INTVAL +ascii_compare(Interp *interpreter, STRING *lhs, STRING *rhs)
{
- return 0;
+ INTVAL retval;
+ UINTVAL offs, l_len, r_len, min_len;
+
+ l_len = lhs->strlen;
+ r_len = rhs->strlen;
+ min_len = l_len > r_len ? r_len : l_len;
+
+ if (lhs->encoding == Parrot_fixed_8_encoding_ptr &&
+ rhs->encoding == Parrot_fixed_8_encoding_ptr) {
+ retval = memcmp(lhs->strstart, rhs->strstart, min_len);
+ }
+ else {
+ UINTVAL cl, cr;
+ for (offs = 0; offs < min_len; ++offs) {
+ cl = ENCODING_GET_BYTE(interpreter, lhs, offs);
+ cr = ENCODING_GET_BYTE(interpreter, rhs, offs);
+ retval = cl - cr;
+ if (retval)
+ break;
+ }
+ }
+ if (!retval) {
+ if (l_len < r_len) {
+ return -1;
+ }
+ if (l_len > r_len) {
+ return 1;
+ }
+ if (l_len == r_len) {
+ return 0;
+ }
+ }
+ retval = retval > 0 ? 1 : -1;
+ return retval;
}
-static INTVAL -cs_index(Interp *interpreter, const STRING *source_string,
+INTVAL +ascii_cs_index(Interp *interpreter, const STRING *source_string,
const STRING *search_string, UINTVAL offset)
{
- return -1;
+ UINTVAL base_size, search_size;
+ char *base, *search;
+ INTVAL retval;
+ if (source_string->charset != search_string->charset) {
+ internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

+ }
+
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
+ retval = Parrot_byte_index(interpreter, source_string,
+ search_string, offset);
+ return retval;
}
-static INTVAL -cs_rindex(Interp *interpreter, const STRING *source_string,
- const STRING *search_string, UINTVAL offset)
-{
- return -1;
+INTVAL +ascii_cs_rindex(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset) {
+ UINTVAL base_size, search_size;
+ char *base, *search;
+ INTVAL retval;
+ if (source_string->charset != search_string->charset) {
+ internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

+ }
+
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
+ retval = Parrot_byte_rindex(interpreter, source_string,
+ search_string, offset);
+ return retval;
}
static UINTVAL @@no-spam -420,6 +472,7 @@no-spam return ascii_find_word_boundary(interpreter, source_string,
offset, typetable);
}
+
static STRING *
string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
{
@@no-spam -429,14 +482,18 @@no-spam return return_string;
}
-static size_t -compute_hash(Interp *interpreter, STRING *source_string)
+/*
+ * TODO pass in the Hash's seed value as initial hashval + */
+size_t +ascii_compute_hash(Interp *interpreter, STRING *source_string)
{
- size_t hashval;
+ size_t hashval = 0;
char *buffptr = (char *)source_string->strstart;
UINTVAL len = source_string->strlen;
+ assert(source_string->encoding == Parrot_fixed_8_encoding_ptr);
while (len--) {
hashval += hashval << 5;
hashval += *buffptr++;
@@no-spam -466,9 +523,9 @@no-spam upcase_first,
downcase_first,
titlecase_first,
- compare,
- cs_index,
- cs_rindex,
+ ascii_compare,
+ ascii_cs_index,
+ ascii_cs_rindex,
validate,
is_wordchar,
find_wordchar,
@@no-spam -487,7 +544,7 @@no-spam ascii_find_not_newline,
find_word_boundary,
string_from_codepoint,
- compute_hash,
+ ascii_compute_hash,
{NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
};
1.9 +8 -5 parrot/charset/ascii.h Index: ascii.h ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.h,v retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- ascii.h 28 Feb 2005 17:17:51 -0000 1.8
+++ ascii.h 1 Mar 2005 11:06:26 -0000 1.9
@@no-spam -1,7 +1,7 @@no-spam /* ascii.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: ascii.h,v 1.8 2005/02/28 17:17:51 leo Exp $
+ * $Id: ascii.h,v 1.9 2005/03/01 11:06:26 leo Exp $
* Overview:
* This is the header for the ascii charset functions * Data Structure and Algorithms:
@@no-spam -32,6 +32,13 @@no-spam INTVAL ascii_find_not_newline(Interp *, STRING *source_string, UINTVAL offset);

INTVAL ascii_find_word_boundary(Interp *, STRING *source_string,
UINTVAL offset, const unsigned char *typetable);
+INTVAL ascii_compare(Interp *, STRING *lhs, STRING *rhs);
+INTVAL ascii_compare(Interp *, STRING *lhs, STRING *rhs);
+INTVAL ascii_cs_index(Interp *, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset);
+INTVAL ascii_cs_rindex(Interp *, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset);
+size_t ascii_compute_hash(Interp *, STRING *source_string);
static void set_graphemes(Interp *, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

static void to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

@@no-spam -45,9 +52,6 @@no-spam static void upcase_first(Interp *, STRING *source_string);
static void downcase_first(Interp *, STRING *source_string);
static void titlecase_first(Interp *, STRING *source_string);
-static INTVAL compare(Interp *, STRING *lhs, STRING *rhs);
-static INTVAL cs_index(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

static UINTVAL validate(Interp *, STRING *source_string);
static INTVAL is_wordchar(Interp *, STRING *source_string, UINTVAL offset);
static INTVAL find_wordchar(Interp *, STRING *source_string, UINTVAL offset);

@@no-spam -61,7 +65,6 @@no-spam static INTVAL is_punctuation(Interp *, STRING *source_string, UINTVAL offset);

static INTVAL find_punctuation(Interp *, STRING *source_string, UINTVAL offset);

static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL offset);

-static size_t compute_hash(Interp *, STRING *source_string);
CHARSET *Parrot_charset_ascii_init(Interp *);
1.9 +21 -43 parrot/charset/binary.c Index: binary.c ===================================================================
RCS file: /cvs/public/parrot/charset/binary.c,v retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- binary.c 28 Feb 2005 08:29:30 -0000 1.8
+++ binary.c 1 Mar 2005 11:06:26 -0000 1.9
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: binary.c,v 1.8 2005/02/28 08:29:30 leo Exp $
+$Id: binary.c,v 1.9 2005/03/01 11:06:26 leo Exp $
=head1 NAME @@no-spam -16,24 +16,17 @@no-spam #include "parrot/parrot.h"
#include "binary.h"
+#include "ascii.h"
/* The encoding we prefer, given a choice */
static ENCODING *preferred_encoding;
-static STRING *
-get_graphemes(Interp *interpreter, STRING *source_string,
- UINTVAL offset, UINTVAL count)
-{
- return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
-}
+#ifdef EXCEPTION +# undef EXCEPTION +#endif -static STRING *
-get_graphemes_inplace(Interp *interpreter, STRING *source_string,
- STRING *dest_string, UINTVAL offset, UINTVAL count)
-{
- return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
- offset, count, dest_string);
-}
+#define EXCEPTION(err, str) \
+ real_exception(interpreter, NULL, err, str)
static void set_graphemes(Interp *interpreter, STRING *source_string,
@@no-spam -92,37 +85,37 @@no-spam static void upcase(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't upcase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't upcase binary data");
}
static void downcase(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't downcase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't downcase binary data");
}
static void titlecase(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't titlecase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't titlecase binary data");
}
static void upcase_first(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't upcase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't upcase binary data");
}
static void downcase_first(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't downcase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't downcase binary data");
}
static void titlecase_first(Interp *interpreter, STRING *source_string)
{
- internal_exception(INVALID_CHARTYPE, "Can't titlecase binary data");
+ EXCEPTION(INVALID_CHARTYPE, "Can't titlecase binary data");
}
static INTVAL @@no-spam -168,7 +161,7 @@no-spam static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)

{
- return offset;
+ return -1;
}
static INTVAL @@no-spam -185,7 +178,7 @@no-spam static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+ return -1;
}
static INTVAL @@no-spam -203,7 +196,7 @@no-spam static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
{
- return offset;
+ return -1;
}
static INTVAL @@no-spam -222,7 +215,7 @@no-spam find_not_punctuation(Interp *interpreter, STRING *source_string,
UINTVAL offset)
{
- return offset;
+ return -1;
}
static INTVAL @@no-spam -240,7 +233,7 @@no-spam static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
{
- return offset;
+ return -1;
}
static INTVAL @@no-spam -258,21 +251,6 @@no-spam return return_string;
}
-static size_t -compute_hash(Interp *interpreter, STRING *source_string)
-{
- size_t hashval;
-
- char *buffptr = (char *)source_string->strstart;
- UINTVAL len = source_string->strlen;
-
- while (len--) {
- hashval += hashval << 5;
- hashval += *buffptr++;
- }
- return hashval;
-}
-
CHARSET *
Parrot_charset_binary_init(Interp *interpreter)
@@no-spam -280,8 +258,8 @@no-spam CHARSET *return_set = Parrot_new_charset(interpreter);
CHARSET base_set = {
"binary",
- get_graphemes,
- get_graphemes_inplace,
+ ascii_get_graphemes,
+ ascii_get_graphemes_inplace,
set_graphemes,
to_charset,
copy_to_charset,
@@no-spam -317,7 +295,7 @@no-spam find_not_newline,
find_word_boundary,
string_from_codepoint,
- compute_hash,
+ ascii_compute_hash,
{NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
};
1.6 +33 -35 parrot/charset/binary.h Index: binary.h ===================================================================
RCS file: /cvs/public/parrot/charset/binary.h,v retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- binary.h 27 Feb 2005 11:03:38 -0000 1.5
+++ binary.h 1 Mar 2005 11:06:26 -0000 1.6
@@no-spam -1,7 +1,7 @@no-spam /* binary.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: binary.h,v 1.5 2005/02/27 11:03:38 leo Exp $
+ * $Id: binary.h,v 1.6 2005/03/01 11:06:26 leo Exp $
* Overview:
* This is the header for the binary charset functions * Data Structure and Algorithms:
@@no-spam -13,40 +13,38 @@no-spam #if !defined(PARROT_CHARSET_BINARY_H_GUARD)
#define PARROT_CHARSET_BINARY_H_GUARD -static STRING *get_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count);

-static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static void to_unicode(Interp *interpreter, STRING *source_string);
-static void compose(Interp *interpreter, STRING *source_string);
-static void decompose(Interp *interpreter, STRING *source_string);
-static void upcase(Interp *interpreter, STRING *source_string);
-static void downcase(Interp *interpreter, STRING *source_string);
-static void titlecase(Interp *interpreter, STRING *source_string);
-static void upcase_first(Interp *interpreter, STRING *source_string);
-static void downcase_first(Interp *interpreter, STRING *source_string);
-static void titlecase_first(Interp *interpreter, STRING *source_string);
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs);
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static UINTVAL validate(Interp *interpreter, STRING *source_string);
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset);

-CHARSET *Parrot_charset_binary_init(Interp *interpreter);
+static void to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static STRING *copy_to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static void to_unicode(Interp *, STRING *source_string);
+static void compose(Interp *, STRING *source_string);
+static void decompose(Interp *, STRING *source_string);
+static void upcase(Interp *, STRING *source_string);
+static void downcase(Interp *, STRING *source_string);
+static void titlecase(Interp *, STRING *source_string);
+static void upcase_first(Interp *, STRING *source_string);
+static void downcase_first(Interp *, STRING *source_string);
+static void titlecase_first(Interp *, STRING *source_string);
+static INTVAL compare(Interp *, STRING *lhs, STRING *rhs);
+static INTVAL cs_index(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+static INTVAL cs_rindex(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+static UINTVAL validate(Interp *, STRING *source_string);
+static INTVAL is_wordchar(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_not_digit(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_newline(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_newline(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_not_newline(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_word_boundary(Interp *, STRING *source_string, UINTVAL offset);

+CHARSET *Parrot_charset_binary_init(Interp *);
#endif /* PARROT_CHARSET_BINARY_H_GUARD */
1.9 +5 -64 parrot/charset/iso-8859-1.c Index: iso-8859-1.c ===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- iso-8859-1.c 28 Feb 2005 17:17:51 -0000 1.8
+++ iso-8859-1.c 1 Mar 2005 11:06:26 -0000 1.9
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: iso-8859-1.c,v 1.8 2005/02/28 17:17:51 leo Exp $
+$Id: iso-8859-1.c,v 1.9 2005/03/01 11:06:26 leo Exp $
=head1 NAME @@no-spam -230,50 +230,6 @@no-spam upcase_first(interpreter, source_string);
}
-static INTVAL -compare(Interp *interpreter, STRING *lhs, STRING *rhs)
-{
- INTVAL retval = memcmp(lhs->strstart, rhs->strstart, lhs->strlen);
- if (!retval && lhs->strlen < rhs->strlen) {
- retval = -1;
- }
- if (retval) {
- retval = retval > 0 ? 1 : -1;
- }
- return retval;
-}
-
-static INTVAL -cs_index(Interp *interpreter, const STRING *source_string,
- const STRING *search_string, UINTVAL offset)
-{
- UINTVAL base_size, search_size;
- char *base, *search;
- INTVAL retval;
- if (source_string->charset != search_string->charset) {
- internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

- }
-
- retval = Parrot_byte_index(interpreter, source_string,
- search_string, offset);
- return retval;
-}
-
-static INTVAL -cs_rindex(Interp *interpreter, const STRING *source_string,
- const STRING *search_string, UINTVAL offset) {
- UINTVAL base_size, search_size;
- char *base, *search;
- INTVAL retval;
- if (source_string->charset != search_string->charset) {
- internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

- }
-
- retval = Parrot_byte_rindex(interpreter, source_string,
- search_string, offset);
- return retval;
-}
-
static UINTVAL validate(Interp *interpreter, STRING *src)
@@no-spam -401,21 +357,6 @@no-spam return return_string;
}
-static size_t -compute_hash(Interp *interpreter, STRING *source_string)
-{
- size_t hashval = 0;
-
- char *buffptr = (char *)source_string->strstart;
- UINTVAL len = source_string->strlen;
-
- while (len--) {
- hashval += hashval << 5;
- hashval += *buffptr++;
- }
- return hashval;
-}
-
CHARSET *
Parrot_charset_iso_8859_1_init(Interp *interpreter)
{
@@no-spam -438,9 +379,9 @@no-spam upcase_first,
downcase_first,
titlecase_first,
- compare,
- cs_index,
- cs_rindex,
+ ascii_compare,
+ ascii_cs_index,
+ ascii_cs_rindex,
validate,
is_wordchar,
find_wordchar,
@@no-spam -459,7 +400,7 @@no-spam ascii_find_not_newline,
find_word_boundary,
string_from_codepoint,
- compute_hash,
+ ascii_compute_hash,
{NULL, 0, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
1.7 +27 -31 parrot/charset/iso-8859-1.h Index: iso-8859-1.h ===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.h,v retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- iso-8859-1.h 28 Feb 2005 17:17:51 -0000 1.6
+++ iso-8859-1.h 1 Mar 2005 11:06:26 -0000 1.7
@@no-spam -1,7 +1,7 @@no-spam /* iso_8859_1.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: iso-8859-1.h,v 1.6 2005/02/28 17:17:51 leo Exp $
+ * $Id: iso-8859-1.h,v 1.7 2005/03/01 11:06:26 leo Exp $
* Overview:
* This is the header for the iso_8859-1 charset functions * Data Structure and Algorithms:
@@no-spam -13,37 +13,33 @@no-spam #if !defined(PARROT_CHARSET_ISO_8859_1_H_GUARD)
#define PARROT_CHARSET_ISO_8859_1_H_GUARD -static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static void to_unicode(Interp *interpreter, STRING *source_string);
-static void compose(Interp *interpreter, STRING *source_string);
-static void decompose(Interp *interpreter, STRING *source_string);
-static void upcase(Interp *interpreter, STRING *source_string);
-static void downcase(Interp *interpreter, STRING *source_string);
-static void titlecase(Interp *interpreter, STRING *source_string);
-static void upcase_first(Interp *interpreter, STRING *source_string);
-static void downcase_first(Interp *interpreter, STRING *source_string);
-static void titlecase_first(Interp *interpreter, STRING *source_string);
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs);
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static UINTVAL validate(Interp *interpreter, STRING *source_string);
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static size_t compute_hash(Interp *interpreter, STRING *source_string);
-CHARSET *Parrot_charset_iso_8859_1_init(Interp *interpreter);
+static void set_graphemes(Interp *, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

+static void to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static STRING *copy_to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static void to_unicode(Interp *, STRING *source_string);
+static void compose(Interp *, STRING *source_string);
+static void decompose(Interp *, STRING *source_string);
+static void upcase(Interp *, STRING *source_string);
+static void downcase(Interp *, STRING *source_string);
+static void titlecase(Interp *, STRING *source_string);
+static void upcase_first(Interp *, STRING *source_string);
+static void downcase_first(Interp *, STRING *source_string);
+static void titlecase_first(Interp *, STRING *source_string);
+static UINTVAL validate(Interp *, STRING *source_string);
+static INTVAL is_wordchar(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_not_digit(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+CHARSET *Parrot_charset_iso_8859_1_init(Interp *);
#endif /* PARROT_CHARSET_ISO_8859_1_H_GUARD */
/*