PERL CVS PARROT 29 CVS COMMIT PARROT SRC CHARSET C UTILS C
Date: 27 Feb 2005 11:03:44 -0000

Subject: cvs commit: parrot/src charset.c utils.c
From: leo@no-spam (Leopold Toetsch)

cvsuser 05/02/27 03:03:44

Modified: charset ascii.c ascii.h binary.c binary.h iso-8859-1.c iso-8859-1.h include/parrot charset.h misc.h src charset.c utils.c Log:
the big string patch 2 - charset cleanup * pdd07 changes * make rindex string args const Revision Changes Path 1.6 +159 -71 parrot/charset/ascii.c Index: ascii.c ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.c,v retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- ascii.c 27 Feb 2005 09:58:40 -0000 1.5
+++ ascii.c 27 Feb 2005 11:03:38 -0000 1.6
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: ascii.c,v 1.5 2005/02/27 09:58:40 leo Exp $
+$Id: ascii.c,v 1.6 2005/02/27 11:03:38 leo Exp $
=head1 NAME @@no-spam -40,50 +40,78 @@no-spam };
-static STRING *get_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count) {

+static STRING *
+get_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL count)
+{
return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
}
-static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string) {

- ENCODING_SET_BYTES(interpreter, source_string, offset, replace_count, insert_string);

+static void +set_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL replace_count, STRING *insert_string)
+{
+ ENCODING_SET_BYTES(interpreter, source_string, offset,
+ replace_count, insert_string);
}
-static STRING *get_graphemes_inplace(Interp *interpreter, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count) {

- return ENCODING_GET_BYTES_INPLACE(interpreter, source_string, offset, count, dest_string);

-}
-
-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static STRING *
+get_graphemes_inplace(Interp *interpreter, STRING *source_string,
+ STRING *dest_string, UINTVAL offset, UINTVAL count)
+{
+ return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
+ offset, count, dest_string);
+}
+
+static void +to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset)
+{
internal_exception(UNIMPLEMENTED, "to_charset for ascii not implemented");

}
-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static STRING *
+copy_to_charset(Interp *interpreter, STRING *source_string,
+ CHARSET *new_charset)
+{
STRING *return_string = NULL;
return return_string;
}
-static void to_unicode(Interp *interpreter, STRING *source_string) {
+static void +to_unicode(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "to_unicode for ascii not implemented");

}
-static void from_charset(Interp *interpreter, STRING *source_string) {
+static void +from_charset(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
-static void from_unicode(Interp *interpreter, STRING *source_string) {
+static void +from_unicode(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
/* A noop. can't compose ascii */
-static void compose(Interp *interpreter, STRING *source_string) {
+static void +compose(Interp *interpreter, STRING *source_string)
+{
}
/* A noop. can't decompose ascii */
-static void decompose(Interp *interpreter, STRING *source_string) {
+static void +decompose(Interp *interpreter, STRING *source_string)
+{
}
-static void upcase(Interp *interpreter, STRING *source_string) {
+static void +upcase(Interp *interpreter, STRING *source_string)
+{
char *buffer;
UINTVAL offset = 0;
@@no-spam -98,7 +126,9 @@no-spam }
}
-static void downcase(Interp *interpreter, STRING *source_string) {
+static void +downcase(Interp *interpreter, STRING *source_string)
+{
UINTVAL offset = 0;
char *buffer;
if (!source_string->strlen) {
@@no-spam -111,7 +141,9 @@no-spam }
}
-static void titlecase(Interp *interpreter, STRING *source_string) {
+static void +titlecase(Interp *interpreter, STRING *source_string)
+{
char *buffer;
UINTVAL offset = 0;
if (!source_string->strlen) {
@@no-spam -125,7 +157,9 @@no-spam }
}
-static void upcase_first(Interp *interpreter, STRING *source_string) {
+static void +upcase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -135,7 +169,9 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static void downcase_first(Interp *interpreter, STRING *source_string) {
+static void +downcase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -145,7 +181,9 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static void titlecase_first(Interp *interpreter, STRING *source_string) {
+static void +titlecase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -155,109 +193,159 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs) {
- return 0;
-}
-
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset) {

- return -1;
-}
-
-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset) {

- return -1;
+static INTVAL +compare(Interp *interpreter, STRING *lhs, STRING *rhs)
+{
+ return 0;
+}
+
+static INTVAL +cs_index(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset)
+{
+ return -1;
+}
+
+static INTVAL +cs_rindex(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset)
+{
+ return -1;
}
/* Binary's always valid */
-static UINTVAL validate(Interp *interpreter, STRING *source_string) {
+static UINTVAL +validate(Interp *interpreter, STRING *source_string)
+{
return 1;
}
/* No word chars in binary data */
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
+ return offset;
}
-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_whitespace(Interp *interpreter, STRING *source_string,
+ UINTVAL offset)
+{
+ return offset;
}
-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return offset;
}
-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_punctuation(Interp *interpreter, STRING *source_string,
+ UINTVAL offset)
+{
+ return offset;
}
-static INTVAL is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return offset;
}
-static INTVAL find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
+ return -1;
}
-static STRING *string_from_codepoint(Interp *interpreter, UINTVAL codepoint) {

+static STRING *
+string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
+{
STRING *return_string = NULL;
char real_codepoint = codepoint;
return_string = string_make(interpreter, &real_codepoint, 1, "ascii", 0);

return return_string;
}
-static size_t compute_hash(Interp *interpreter, STRING *source_string) {
+static size_t +compute_hash(Interp *interpreter, STRING *source_string)
+{
size_t hashval;
char *buffptr = (char *)source_string->strstart;
- UINTVAL len = source_string->strlen; + UINTVAL len = source_string->strlen;
- while (len--) { + while (len--) {
hashval += hashval << 5;
hashval += *buffptr++;
}
return hashval;
}
-CHARSET *Parrot_charset_ascii_init(Interp *interpreter) {
+CHARSET *
+Parrot_charset_ascii_init(Interp *interpreter)
+{
CHARSET *return_set = Parrot_new_charset(interpreter);
CHARSET base_set = {
"ascii",
@@no-spam -299,7 +387,8 @@no-spam find_word_boundary,
string_from_codepoint,
compute_hash,
- {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}

+ {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
};
/* Snag the global. This is... bad. Should be properly fixed at some @@no-spam -311,7 +400,6 @@no-spam memcpy(return_set, &base_set, sizeof(CHARSET));
Parrot_register_charset(interpreter, "ascii", return_set);
return return_set;
-
}
/*
1.6 +37 -37 parrot/charset/ascii.h Index: ascii.h ===================================================================
RCS file: /cvs/public/parrot/charset/ascii.h,v retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- ascii.h 27 Feb 2005 09:58:40 -0000 1.5
+++ ascii.h 27 Feb 2005 11:03:38 -0000 1.6
@@no-spam -1,7 +1,7 @@no-spam /* ascii.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: ascii.h,v 1.5 2005/02/27 09:58:40 leo Exp $
+ * $Id: ascii.h,v 1.6 2005/02/27 11:03:38 leo Exp $
* Overview:
* This is the header for the ascii charset functions * Data Structure and Algorithms:
@@no-spam -13,42 +13,42 @@no-spam #if !defined(PARROT_CHARSET_ASCII_H_GUARD)
#define PARROT_CHARSET_ASCII_H_GUARD -static STRING *get_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count);

-static STRING *get_graphemes_inplace(Interp *interpreter, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count);

-static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-static void to_unicode(Interp *interpreter, STRING *source_string);
-static void compose(Interp *interpreter, STRING *source_string);
-static void decompose(Interp *interpreter, STRING *source_string);
-static void upcase(Interp *interpreter, STRING *source_string);
-static void downcase(Interp *interpreter, STRING *source_string);
-static void titlecase(Interp *interpreter, STRING *source_string);
-static void upcase_first(Interp *interpreter, STRING *source_string);
-static void downcase_first(Interp *interpreter, STRING *source_string);
-static void titlecase_first(Interp *interpreter, STRING *source_string);
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs);
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset);

-static UINTVAL validate(Interp *interpreter, STRING *source_string);
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static INTVAL find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset);

-static size_t compute_hash(Interp *interpreter, STRING *source_string);
-CHARSET *Parrot_charset_ascii_init(Interp *interpreter);
+static STRING *get_graphemes(Interp *, STRING *source_string, UINTVAL offset, UINTVAL count);

+static STRING *get_graphemes_inplace(Interp *, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count);

+static void set_graphemes(Interp *, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

+static void to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static STRING *copy_to_charset(Interp *, STRING *source_string, CHARSET *new_charset);

+static void to_unicode(Interp *, STRING *source_string);
+static void compose(Interp *, STRING *source_string);
+static void decompose(Interp *, STRING *source_string);
+static void upcase(Interp *, STRING *source_string);
+static void downcase(Interp *, STRING *source_string);
+static void titlecase(Interp *, STRING *source_string);
+static void upcase_first(Interp *, STRING *source_string);
+static void downcase_first(Interp *, STRING *source_string);
+static void titlecase_first(Interp *, STRING *source_string);
+static INTVAL compare(Interp *, STRING *lhs, STRING *rhs);
+static INTVAL cs_index(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+static INTVAL cs_rindex(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+static UINTVAL validate(Interp *, STRING *source_string);
+static INTVAL is_wordchar(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_wordchar(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_whitespace(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_digit(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_not_digit(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL is_newline(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_newline(Interp *, STRING *source_string, UINTVAL offset);
+static INTVAL find_not_newline(Interp *, STRING *source_string, UINTVAL offset);

+static INTVAL find_word_boundary(Interp *, STRING *source_string, UINTVAL offset);

+static size_t compute_hash(Interp *, STRING *source_string);
+CHARSET *Parrot_charset_ascii_init(Interp *);
#endif /* PARROT_CHARSET_ASCII_H_GUARD */
1.7 +157 -70 parrot/charset/binary.c Index: binary.c ===================================================================
RCS file: /cvs/public/parrot/charset/binary.c,v retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- binary.c 27 Feb 2005 09:58:40 -0000 1.6
+++ binary.c 27 Feb 2005 11:03:38 -0000 1.7
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: binary.c,v 1.6 2005/02/27 09:58:40 leo Exp $
+$Id: binary.c,v 1.7 2005/02/27 11:03:38 leo Exp $
=head1 NAME @@no-spam -20,169 +20,253 @@no-spam /* The encoding we prefer, given a choice */
static ENCODING *preferred_encoding;
-static STRING *get_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count) {

+static STRING *
+get_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL count)
+{
return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
}
-static STRING *get_graphemes_inplace(Interp *interpreter, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count) {

- return ENCODING_GET_BYTES_INPLACE(interpreter, source_string, offset, count, dest_string);

-}
-
-static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string) {

- ENCODING_SET_BYTES(interpreter, source_string, offset, replace_count, insert_string);

-}
-
-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static STRING *
+get_graphemes_inplace(Interp *interpreter, STRING *source_string,
+ STRING *dest_string, UINTVAL offset, UINTVAL count)
+{
+ return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
+ offset, count, dest_string);
+}
+
+static void +set_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL replace_count, STRING *insert_string)
+{
+ ENCODING_SET_BYTES(interpreter, source_string, offset,
+ replace_count, insert_string);
+}
+
+static void +to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset)
+{
internal_exception(UNIMPLEMENTED, "to_charset for binary not implemented");

}
-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static STRING *
+copy_to_charset(Interp *interpreter, STRING *source_string,
+ CHARSET *new_charset)
+{
STRING *return_string = NULL;
- internal_exception(UNIMPLEMENTED, "copy_to_charset for binary not implemented");

-
+ internal_exception(UNIMPLEMENTED,
+ "copy_to_charset for binary not implemented");
return return_string;
}
-static void to_unicode(Interp *interpreter, STRING *source_string) {
+static void +to_unicode(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "to_unicode for binary not implemented");

}
-static void from_charset(Interp *interpreter, STRING *source_string) {
+static void +from_charset(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
-static void from_unicode(Interp *interpreter, STRING *source_string) {
+static void +from_unicode(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
/* A noop. can't compose binary */
-static void compose(Interp *interpreter, STRING *source_string) {
+static void +compose(Interp *interpreter, STRING *source_string)
+{
}
/* A noop. can't decompose binary */
-static void decompose(Interp *interpreter, STRING *source_string) {
+static void +decompose(Interp *interpreter, STRING *source_string)
+{
}
-static void upcase(Interp *interpreter, STRING *source_string) {
+static void +upcase(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't upcase binary data");
}
-static void downcase(Interp *interpreter, STRING *source_string) {
+static void +downcase(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't downcase binary data");
}
-static void titlecase(Interp *interpreter, STRING *source_string) {
+static void +titlecase(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't titlecase binary data");
}
-static void upcase_first(Interp *interpreter, STRING *source_string) {
+static void +upcase_first(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't upcase binary data");
}
-static void downcase_first(Interp *interpreter, STRING *source_string) {
+static void +downcase_first(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't downcase binary data");
}
-static void titlecase_first(Interp *interpreter, STRING *source_string) {
+static void +titlecase_first(Interp *interpreter, STRING *source_string)
+{
internal_exception(INVALID_CHARTYPE, "Can't titlecase binary data");
}
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs) {
+static INTVAL +compare(Interp *interpreter, STRING *lhs, STRING *rhs)
+{
return 0;
}
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset) {

- return -1;
+static INTVAL +cs_index(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset) {

- return -1;
+static INTVAL +cs_rindex(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset)
+{
+ return -1;
}
/* Binary's always valid */
-static UINTVAL validate(Interp *interpreter, STRING *source_string) {
+static UINTVAL +validate(Interp *interpreter, STRING *source_string)
+{
return 1;
}
/* No word chars in binary data */
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
+ return offset;
}
-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+ return offset;
}
-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return offset;
}
-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_punctuation(Interp *interpreter, STRING *source_string,
+ UINTVAL offset)
+{
+ return offset;
}
-static INTVAL is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return 0;
+static INTVAL +is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return 0;
}
-static INTVAL find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return -1;
}
-static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return offset;
+static INTVAL +find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
+ return offset;
}
-static INTVAL find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset) {

- return -1;
+static INTVAL +find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
+ return -1;
}
-static STRING *string_from_codepoint(Interp *interpreter, UINTVAL codepoint) {

+static STRING *
+string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
+{
STRING *return_string = NULL;
char real_codepoint = codepoint;
return_string = string_make(interpreter, &real_codepoint, 1, "binary", 0);

return return_string;
}
-static size_t compute_hash(Interp *interpreter, STRING *source_string) {
+static size_t +compute_hash(Interp *interpreter, STRING *source_string)
+{
size_t hashval;
char *buffptr = (char *)source_string->strstart;
- UINTVAL len = source_string->strlen; + UINTVAL len = source_string->strlen;
- while (len--) { + while (len--) {
hashval += hashval << 5;
hashval += *buffptr++;
}
@@no-spam -190,7 +274,9 @@no-spam }
-CHARSET *Parrot_charset_binary_init(Interp *interpreter) {
+CHARSET *
+Parrot_charset_binary_init(Interp *interpreter)
+{
CHARSET *return_set = Parrot_new_charset(interpreter);
CHARSET base_set = {
"binary",
@@no-spam -232,7 +318,8 @@no-spam find_word_boundary,
string_from_codepoint,
compute_hash,
- {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}

+ {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
};
/* Snag the global. This is... bad. Should be properly fixed at some @@no-spam -240,7 +327,7 @@no-spam preferred_encoding = Parrot_fixed_8_encoding_ptr;
/* preferred_encoding = Parrot_load_encoding(interpreter, "fixed_8"); */
- +
memcpy(return_set, &base_set, sizeof(CHARSET));
Parrot_register_charset(interpreter, "binary", return_set);
1.5 +2 -2 parrot/charset/binary.h Index: binary.h ===================================================================
RCS file: /cvs/public/parrot/charset/binary.h,v retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- binary.h 27 Feb 2005 09:58:40 -0000 1.4
+++ binary.h 27 Feb 2005 11:03:38 -0000 1.5
@@no-spam -1,7 +1,7 @@no-spam /* binary.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: binary.h,v 1.4 2005/02/27 09:58:40 leo Exp $
+ * $Id: binary.h,v 1.5 2005/02/27 11:03:38 leo Exp $
* Overview:
* This is the header for the binary charset functions * Data Structure and Algorithms:
@@no-spam -28,7 +28,7 @@no-spam static void titlecase_first(Interp *interpreter, STRING *source_string);
static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs);
static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset);

+static INTVAL cs_rindex(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

static UINTVAL validate(Interp *interpreter, STRING *source_string);
static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

1.4 +216 -117 parrot/charset/iso-8859-1.c Index: iso-8859-1.c ===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- iso-8859-1.c 27 Feb 2005 09:58:40 -0000 1.3
+++ iso-8859-1.c 27 Feb 2005 11:03:38 -0000 1.4
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: iso-8859-1.c,v 1.3 2005/02/27 09:58:40 leo Exp $
+$Id: iso-8859-1.c,v 1.4 2005/02/27 11:03:38 leo Exp $
=head1 NAME @@no-spam -25,7 +25,7 @@no-spam #define PUNCTUATION 3
#define DIGIT 4
-static char typetable[256] = {
+static unsigned char typetable[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, /* 0-15 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16-31 */
1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 32-47 */
@@no-spam -44,12 +44,15 @@no-spam 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 240-255 */
};
-static INTVAL find_thing(Interp *interpreter, STRING *string, UINTVAL start, UINTVAL type) {

+static INTVAL +find_thing(Interp *interpreter, STRING *string, UINTVAL start, UINTVAL type)
+{
INTVAL retval = -1;
UINTVAL offset = start;
INTVAL found = 0;
for (; offset < string->strlen; offset++) {
- if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)] == type) {

+ if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)]
+ == type) {
found = 1;
break;
}
@@no-spam -60,12 +63,16 @@no-spam return retval;
}
-static INTVAL find_not_thing(Interp *interpreter, STRING *string, UINTVAL start, UINTVAL type) {

+static INTVAL +find_not_thing(Interp *interpreter, STRING *string, UINTVAL start,
+ UINTVAL type)
+{
INTVAL retval = -1;
UINTVAL offset = start;
INTVAL found = 0;
for (; offset < string->strlen; offset++) {
- if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)] != type) {

+ if (typetable[ENCODING_GET_CODEPOINT(interpreter, string, offset)]
+ != type) {
found = 1;
break;
}
@@no-spam -76,56 +83,86 @@no-spam return retval;
}
-static STRING *get_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count) {

+static STRING *
+get_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL count)
+{
return ENCODING_GET_BYTES(interpreter, source_string, offset, count);
}
-static STRING *get_graphemes_inplace(Interp *interpreter, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count) {

- return ENCODING_GET_BYTES_INPLACE(interpreter, source_string, offset, count, dest_string);

-}
-
-static void set_graphemes(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string) {

- ENCODING_SET_BYTES(interpreter, source_string, offset, replace_count, insert_string);

-
-}
-
-static void from_charset(Interp *interpreter, STRING *source_string) {
+static STRING *
+get_graphemes_inplace(Interp *interpreter, STRING *source_string,
+ STRING *dest_string, UINTVAL offset, UINTVAL count)
+{
+ return ENCODING_GET_BYTES_INPLACE(interpreter, source_string,
+ offset, count, dest_string);
+}
+
+static void +set_graphemes(Interp *interpreter, STRING *source_string,
+ UINTVAL offset, UINTVAL replace_count, STRING *insert_string)
+{
+ ENCODING_SET_BYTES(interpreter, source_string, offset,
+ replace_count, insert_string);
+}
+
+static void +from_charset(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
-static void from_unicode(Interp *interpreter, STRING *source_string) {
+static void +from_unicode(Interp *interpreter, STRING *source_string)
+{
internal_exception(UNIMPLEMENTED, "Can't do this yet");
}
-static void to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static void +to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset)
+{
void *conversion_func;
- if ((conversion_func = Parrot_find_charset_converter(interpreter, source_string->charset, new_charset))) {

- } else {
+ if ((conversion_func = Parrot_find_charset_converter(interpreter,
+ source_string->charset, new_charset))) {
+ }
+ else {
to_unicode(interpreter, source_string);
new_charset->from_charset(interpreter, source_string);
}
}
-static STRING *copy_to_charset(Interp *interpreter, STRING *source_string, CHARSET *new_charset) {

+static STRING *
+copy_to_charset(Interp *interpreter, STRING *source_string,
+ CHARSET *new_charset)
+{
STRING *return_string = NULL;
return return_string;
}
-static void to_unicode(Interp *interpreter, STRING *source_string) {
- internal_exception(UNIMPLEMENTED, "to_unicode for iso-8859-1 not implemented");

+static void +to_unicode(Interp *interpreter, STRING *source_string)
+{
+ internal_exception(UNIMPLEMENTED,
+ "to_unicode for iso-8859-1 not implemented");
}
/* A noop. can't compose iso-8859-1 */
-static void compose(Interp *interpreter, STRING *source_string) {
+static void +compose(Interp *interpreter, STRING *source_string)
+{
}
/* A noop. can't decompose iso-8859-1 */
-static void decompose(Interp *interpreter, STRING *source_string) {
+static void +decompose(Interp *interpreter, STRING *source_string)
+{
}
-static void upcase(Interp *interpreter, STRING *source_string) {
+static void +upcase(Interp *interpreter, STRING *source_string)
+{
char *buffer;
UINTVAL offset = 0;
@@no-spam -140,7 +177,9 @@no-spam }
}
-static void downcase(Interp *interpreter, STRING *source_string) {
+static void +downcase(Interp *interpreter, STRING *source_string)
+{
UINTVAL offset = 0;
char *buffer;
if (!source_string->strlen) {
@@no-spam -153,7 +192,9 @@no-spam }
}
-static void titlecase(Interp *interpreter, STRING *source_string) {
+static void +titlecase(Interp *interpreter, STRING *source_string)
+{
char *buffer;
UINTVAL offset = 0;
if (!source_string->strlen) {
@@no-spam -167,7 +208,9 @@no-spam }
}
-static void upcase_first(Interp *interpreter, STRING *source_string) {
+static void +upcase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -177,7 +220,9 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static void downcase_first(Interp *interpreter, STRING *source_string) {
+static void +downcase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -187,7 +232,9 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static void titlecase_first(Interp *interpreter, STRING *source_string) {
+static void +titlecase_first(Interp *interpreter, STRING *source_string)
+{
char *buffer;
if (!source_string->strlen) {
return;
@@no-spam -197,7 +244,9 @@no-spam buffer[0] = toupper(buffer[0]);
}
-static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs) {
+static INTVAL +compare(Interp *interpreter, STRING *lhs, STRING *rhs)
+{
INTVAL retval = memcmp(lhs->strstart, rhs->strstart, lhs->strlen);
if (!retval && lhs->strlen < rhs->strlen) {
retval = -1;
@@no-spam -208,187 +257,237 @@no-spam return retval;
}
-static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset) {

+static INTVAL +cs_index(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset)
+{
UINTVAL base_size, search_size;
char *base, *search;
INTVAL retval;
if (source_string->charset != search_string->charset) {
internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

}
- - retval = Parrot_byte_index(interpreter, source_string, search_string, offset);

+
+ retval = Parrot_byte_index(interpreter, source_string,
+ search_string, offset);
return retval;
}
-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset) {

+static INTVAL +cs_rindex(Interp *interpreter, const STRING *source_string,
+ const STRING *search_string, UINTVAL offset) {
UINTVAL base_size, search_size;
char *base, *search;
INTVAL retval;
if (source_string->charset != search_string->charset) {
internal_exception(UNIMPLEMENTED, "Cross-charset index not supported");

}
- - retval = Parrot_byte_rindex(interpreter, source_string, search_string, offset);

+
+ retval = Parrot_byte_rindex(interpreter, source_string,
+ search_string, offset);
return retval;
}
/* Binary's always valid */
-static UINTVAL validate(Interp *interpreter, STRING *source_string) {
+static UINTVAL +validate(Interp *interpreter, STRING *source_string)
+{
return 1;
}
/* No word chars in binary data */
-static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
UINTVAL codepoint;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
return (typetable[codepoint] == WORDCHAR);
}
-static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return find_thing(interpreter, source_string, offset, WORDCHAR);
}
-static INTVAL find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_not_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
return find_not_thing(interpreter, source_string, offset, WORDCHAR);
}
-static INTVAL is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +is_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
UINTVAL codepoint;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
return (typetable[codepoint] == WHITESPACE);
}
-static INTVAL find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return find_thing(interpreter, source_string, offset, WHITESPACE);
}
-static INTVAL find_not_whitespace(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_not_whitespace(Interp *interpreter, STRING *source_string,
+ UINTVAL offset)
+{
return find_not_thing(interpreter, source_string, offset, WHITESPACE);
}
-static INTVAL is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +is_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
UINTVAL codepoint;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
return (typetable[codepoint] == DIGIT);
}
-static INTVAL find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return find_thing(interpreter, source_string, offset, DIGIT);
}
-static INTVAL find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_not_digit(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return find_not_thing(interpreter, source_string, offset, DIGIT);
}
-static INTVAL is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +is_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
UINTVAL codepoint;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
return (typetable[codepoint] == PUNCTUATION);
}
-static INTVAL find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return find_thing(interpreter, source_string, offset, PUNCTUATION);
}
-static INTVAL find_not_punctuation(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_not_punctuation(Interp *interpreter, STRING *source_string,
+ UINTVAL offset)
+{
return find_not_thing(interpreter, source_string, offset, PUNCTUATION);
}
-static INTVAL is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +is_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
UINTVAL codepoint;
codepoint = ENCODING_GET_CODEPOINT(interpreter, source_string, offset);
return codepoint == 13;
}
-static INTVAL find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return -1;
}
-static INTVAL find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_not_newline(Interp *interpreter, STRING *source_string, UINTVAL offset)
+{
return offset;
}
-static INTVAL find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset) {

+static INTVAL +find_word_boundary(Interp *interpreter, STRING *source_string, UINTVAL offset)

+{
return -1;
}
-static STRING *string_from_codepoint(Interp *interpreter, UINTVAL codepoint) {

+static STRING *
+string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
+{
STRING *return_string = NULL;
char real_codepoint = codepoint;
- return_string = string_make(interpreter, &real_codepoint, 1, "iso-8859-1", 0);

+ return_string = string_make(interpreter, &real_codepoint, 1,
+ "iso-8859-1", 0);
return return_string;
}
-static size_t compute_hash(Interp *interpreter, STRING *source_string) {
+static size_t +compute_hash(Interp *interpreter, STRING *source_string)
+{
size_t hashval = 0;
char *buffptr = (char *)source_string->strstart;
- UINTVAL len = source_string->strlen; + UINTVAL len = source_string->strlen;
- while (len--) { + while (len--) {
hashval += hashval << 5;
hashval += *buffptr++;
}
return hashval;
}
-CHARSET *Parrot_charset_iso_8859_1_init(Interp *interpreter) {
- CHARSET *return_set = Parrot_new_charset(interpreter);
- CHARSET base_set = {
- "iso-8859-1",
- get_graphemes,
- get_graphemes_inplace,
- set_graphemes,
- to_charset,
- copy_to_charset,
- to_unicode,
- from_charset,
- from_unicode,
- compose,
- decompose,
- upcase,
- downcase,
- titlecase,
- upcase_first,
- downcase_first,
- titlecase_first,
- compare,
- cs_index,
- cs_rindex,
- validate,
- is_wordchar,
- find_wordchar,
- find_not_wordchar,
- is_whitespace,
- find_whitespace,
- find_not_whitespace,
- is_digit,
- find_digit,
- find_not_digit,
- is_punctuation,
- find_punctuation,
- find_not_punctuation,
- is_newline,
- find_newline,
- find_not_newline,
- find_word_boundary,
- string_from_codepoint,
- compute_hash,
- {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}

-
- };
-
- /* Snag the global. This is... bad. Should be properly fixed at some - point */
- preferred_encoding = Parrot_fixed_8_encoding_ptr;
-
-/* preferred_encoding = Parrot_load_encoding(interpreter, "fixed_8"); */
-
- memcpy(return_set, &base_set, sizeof(CHARSET));
- Parrot_register_charset(interpreter, "iso-8859-1", return_set);
- return return_set;
-
+CHARSET *
+Parrot_charset_iso_8859_1_init(Interp *interpreter)
+{
+ CHARSET *return_set = Parrot_new_charset(interpreter);
+ CHARSET base_set = {
+ "iso-8859-1",
+ get_graphemes,
+ get_graphemes_inplace,
+ set_graphemes,
+ to_charset,
+ copy_to_charset,
+ to_unicode,
+ from_charset,
+ from_unicode,
+ compose,
+ decompose,
+ upcase,
+ downcase,
+ titlecase,
+ upcase_first,
+ downcase_first,
+ titlecase_first,
+ compare,
+ cs_index,
+ cs_rindex,
+ validate,
+ is_wordchar,
+ find_wordchar,
+ find_not_wordchar,
+ is_whitespace,
+ find_whitespace,
+ find_not_whitespace,
+ is_digit,
+ find_digit,
+ find_not_digit,
+ is_punctuation,
+ find_punctuation,
+ find_not_punctuation,
+ is_newline,
+ find_newline,
+ find_not_newline,
+ find_word_boundary,
+ string_from_codepoint,
+ compute_hash,
+ {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}
+
+ };
+
+ /* Snag the global. This is... bad. Should be properly fixed at some + point */
+ preferred_encoding = Parrot_fixed_8_encoding_ptr;
+
+ /* preferred_encoding = Parrot_load_encoding(interpreter, "fixed_8"); */

+
+ memcpy(return_set, &base_set, sizeof(CHARSET));
+ Parrot_register_charset(interpreter, "iso-8859-1", return_set);
+ return return_set;
}
/*
1.4 +2 -2 parrot/charset/iso-8859-1.h Index: iso-8859-1.h ===================================================================
RCS file: /cvs/public/parrot/charset/iso-8859-1.h,v retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- iso-8859-1.h 27 Feb 2005 09:58:40 -0000 1.3
+++ iso-8859-1.h 27 Feb 2005 11:03:38 -0000 1.4
@@no-spam -1,7 +1,7 @@no-spam /* iso_8859_1.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: iso-8859-1.h,v 1.3 2005/02/27 09:58:40 leo Exp $
+ * $Id: iso-8859-1.h,v 1.4 2005/02/27 11:03:38 leo Exp $
* Overview:
* This is the header for the iso_8859-1 charset functions * Data Structure and Algorithms:
@@no-spam -28,7 +28,7 @@no-spam static void titlecase_first(Interp *interpreter, STRING *source_string);
static INTVAL compare(Interp *interpreter, STRING *lhs, STRING *rhs);
static INTVAL cs_index(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-static INTVAL cs_rindex(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset);

+static INTVAL cs_rindex(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

static UINTVAL validate(Interp *interpreter, STRING *source_string);
static INTVAL is_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

static INTVAL find_wordchar(Interp *interpreter, STRING *source_string, UINTVAL offset);

1.5 +48 -48 parrot/include/parrot/charset.h Index: charset.h ===================================================================
RCS file: /cvs/public/parrot/include/parrot/charset.h,v retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- charset.h 27 Feb 2005 09:58:43 -0000 1.4
+++ charset.h 27 Feb 2005 11:03:40 -0000 1.5
@@no-spam -1,7 +1,7 @@no-spam /* charset.h * Copyright: 2004 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: charset.h,v 1.4 2005/02/27 09:58:43 leo Exp $
+ * $Id: charset.h,v 1.5 2005/02/27 11:03:40 leo Exp $
* Overview:
* This is the header for the 8-bit fixed-width encoding * Data Structure and Algorithms:
@@no-spam -31,53 +31,53 @@no-spam #define PARROT_BINARY_CHARSET Parrot_binary_charset #define PARROT_UNICODE_CHARSET Parrot_unicode_charset_ptr -typedef STRING *(*charset_get_graphemes_t)(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL count);

-typedef STRING *(*charset_get_graphemes_inplace_t)(Interp *interpreter, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count);

-typedef void (*charset_set_graphemes_t)(Interp *interpreter, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

-typedef void (*charset_to_charset_t)(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-typedef STRING *(*charset_copy_to_charset_t)(Interp *interpreter, STRING *source_string, CHARSET *new_charset);

-typedef void (*charset_to_unicode_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_from_charset_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_from_unicode_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_compose_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_decompose_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_upcase_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_downcase_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_titlecase_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_upcase_first_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_downcase_first_t)(Interp *interpreter, STRING *source_string);

-typedef void (*charset_titlecase_first_t)(Interp *interpreter, STRING *source_string);

-typedef INTVAL (*charset_compare_t)(Interp *interpreter, STRING *lhs, STRING *rhs);

-typedef INTVAL (*charset_index_t)(Interp *interpreter, const STRING *source_string, const STRING *search_string, UINTVAL offset);

-typedef INTVAL (*charset_rindex_t)(Interp *interpreter, STRING *source_string, STRING *search_string, UINTVAL offset);

-typedef UINTVAL (*charset_validate_t)(Interp *interpreter, STRING *source_string);

-typedef INTVAL (*charset_is_wordchar_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_wordchar_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_not_wordchar_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_is_whitespace_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_whitespace_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_not_whitespace_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_is_digit_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_digit_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_not_digit_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_is_punctuation_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_punctuation_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_not_punctuation_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_is_newline_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_newline_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_not_newline_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef INTVAL (*charset_find_word_boundary_t)(Interp *interpreter, STRING *source_string, UINTVAL offset);

-typedef STRING *(*charset_string_from_codepoint_t)(Interp *interpreter, UINTVAL codepoint);

-typedef size_t (*charset_compute_hash_t)(Interp *interpreter, STRING *source_string);

-
-CHARSET *Parrot_new_charset(Interp *interpreter);
-CHARSET *Parrot_load_charset(Interp *interpreter, const char *charsetname);
-CHARSET *Parrot_find_charset(Interp *interpreter, const char *charsetname);
-INTVAL Parrot_register_charset(Interp *interpreter, const char *charsetname, CHARSET *charset);

-INTVAL Parrot_make_default_charset(Interp *interpreter, const char *charsetname, CHARSET *charset);

-CHARSET *Parrot_default_charset(Interp *interpreter);
-typedef INTVAL (*charset_converter_t)(Interp *interpreter, CHARSET *lhs, CHARSET *rhs);

-charset_converter_t Parrot_find_charset_converter(Interp *interpreter, CHARSET *lhs, CHARSET *rhs);

+typedef STRING *(*charset_get_graphemes_t)(Interp *, STRING *source_string, UINTVAL offset, UINTVAL count);

+typedef STRING *(*charset_get_graphemes_inplace_t)(Interp *, STRING *source_string, STRING *dest_string, UINTVAL offset, UINTVAL count);

+typedef void (*charset_set_graphemes_t)(Interp *, STRING *source_string, UINTVAL offset, UINTVAL replace_count, STRING *insert_string);

+typedef void (*charset_to_charset_t)(Interp *, STRING *source_string, CHARSET *new_charset);

+typedef STRING *(*charset_copy_to_charset_t)(Interp *, STRING *source_string, CHARSET *new_charset);

+typedef void (*charset_to_unicode_t)(Interp *, STRING *source_string);
+typedef void (*charset_from_charset_t)(Interp *, STRING *source_string);
+typedef void (*charset_from_unicode_t)(Interp *, STRING *source_string);
+typedef void (*charset_compose_t)(Interp *, STRING *source_string);
+typedef void (*charset_decompose_t)(Interp *, STRING *source_string);
+typedef void (*charset_upcase_t)(Interp *, STRING *source_string);
+typedef void (*charset_downcase_t)(Interp *, STRING *source_string);
+typedef void (*charset_titlecase_t)(Interp *, STRING *source_string);
+typedef void (*charset_upcase_first_t)(Interp *, STRING *source_string);
+typedef void (*charset_downcase_first_t)(Interp *, STRING *source_string);
+typedef void (*charset_titlecase_first_t)(Interp *, STRING *source_string);
+typedef INTVAL (*charset_compare_t)(Interp *, STRING *lhs, STRING *rhs);
+typedef INTVAL (*charset_index_t)(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+typedef INTVAL (*charset_rindex_t)(Interp *, const STRING *source_string, const STRING *search_string, UINTVAL offset);

+typedef UINTVAL (*charset_validate_t)(Interp *, STRING *source_string);
+typedef INTVAL (*charset_is_wordchar_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_wordchar_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_not_wordchar_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_is_whitespace_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_whitespace_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_not_whitespace_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_is_digit_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_digit_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_not_digit_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_is_punctuation_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_punctuation_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_not_punctuation_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_is_newline_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_newline_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_not_newline_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef INTVAL (*charset_find_word_boundary_t)(Interp *, STRING *source_string, UINTVAL offset);

+typedef STRING *(*charset_string_from_codepoint_t)(Interp *, UINTVAL codepoint);

+typedef size_t (*charset_compute_hash_t)(Interp *, STRING *source_string);
+
+CHARSET *Parrot_new_charset(Interp *);
+CHARSET *Parrot_load_charset(Interp *, const char *charsetname);
+CHARSET *Parrot_find_charset(Interp *, const char *charsetname);
+INTVAL Parrot_register_charset(Interp *, const char *charsetname, CHARSET *charset);

+INTVAL Parrot_make_default_charset(Interp *, const char *charsetname, CHARSET *charset);

+CHARSET *Parrot_default_charset(Interp *);
+typedef INTVAL (*charset_converter_t)(Interp *, CHARSET *lhs, CHARSET *rhs);
+charset_converter_t Parrot_find_charset_converter(Interp *, CHARSET *lhs, CHARSET *rhs);

struct _charset {
const char *name;
1.24 +5 -3 parrot/include/parrot/misc.h Index: misc.h ===================================================================
RCS file: /cvs/public/parrot/include/parrot/misc.h,v retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- misc.h 27 Feb 2005 09:58:43 -0000 1.23
+++ misc.h 27 Feb 2005 11:03:40 -0000 1.24
@@no-spam -1,7 +1,7 @@no-spam /* misc.h * Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
* CVS Info - * $Id: misc.h,v 1.23 2005/02/27 09:58:43 leo Exp $
+ * $Id: misc.h,v 1.24 2005/02/27 11:03:40 leo Exp $
* Overview:
* Miscellaneous functions, mainly the Parrot_sprintf family * Data Structure and Algorithms:
@@no-spam -41,8 +41,10 @@no-spam void Parrot_destroy_la(long *);
void Parrot_destroy_cpa(char **);
PMC* tm_to_array(Parrot_Interp interpreter, struct tm *tm);
-INTVAL Parrot_byte_index(Interp *interpreter, STRING *base, STRING *search, UINTVAL start_offset);

-INTVAL Parrot_byte_rindex(Interp *interpreter, STRING *base, STRING *search, UINTVAL start_offset);

+INTVAL Parrot_byte_index(Interp *interpreter, const STRING *base,
+ const STRING *search, UINTVAL start_offset);
+INTVAL Parrot_byte_rindex(Interp *interpreter, const STRING *base,
+ const STRING *search, UINTVAL start_offset);
/*
* misc.c */
1.4 +24 -9 parrot/src/charset.c Index: charset.c ===================================================================
RCS file: /cvs/public/parrot/src/charset.c,v retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- charset.c 27 Feb 2005 09:58:47 -0000 1.3
+++ charset.c 27 Feb 2005 11:03:43 -0000 1.4
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2004 The Perl Foundation. All Rights Reserved.
-$Id: charset.c,v 1.3 2005/02/27 09:58:47 leo Exp $
+$Id: charset.c,v 1.4 2005/02/27 11:03:43 leo Exp $
=head1 NAME @@no-spam -20,12 +20,16 @@no-spam CHARSET *Parrot_default_charset_ptr;
CHARSET *Parrot_unicode_charset_ptr;
-CHARSET *Parrot_new_charset(Interp *interpreter) {
+CHARSET *
+Parrot_new_charset(Interp *interpreter)
+{
return mem_sys_allocate(sizeof(CHARSET));
}
-CHARSET *Parrot_find_charset(Interp *interpreter, const char *charsetname) {
+CHARSET *
+Parrot_find_charset(Interp *interpreter, const char *charsetname)
+{
if (!strcmp("iso-8859-1", charsetname)) {
return Parrot_iso_8859_1_charset_ptr;
}
@@no-spam -38,12 +42,17 @@no-spam return NULL;
}
-CHARSET *Parrot_load_charset(Interp *interpreter, const char *charsetname) {
+CHARSET *
+Parrot_load_charset(Interp *interpreter, const char *charsetname)
+{
internal_exception(UNIMPLEMENTED, "Can't load charsets yet");
return NULL;
}
-INTVAL Parrot_register_charset(Interp *interpreter, const char *charsetname, CHARSET *charset) {

+INTVAL +Parrot_register_charset(Interp *interpreter, const char *charsetname,
+ CHARSET *charset)
+{
if (!strcmp("binary", charsetname)) {
Parrot_binary_charset_ptr = charset;
return 1;
@@no-spam -52,7 +61,6 @@no-spam Parrot_iso_8859_1_charset_ptr = charset;
if (!Parrot_default_charset_ptr) {
Parrot_default_charset_ptr = charset;
-
}
return 1;
}
@@no-spam -63,16 +71,23 @@no-spam return 0;
}
-INTVAL Parrot_make_default_charset(Interp *interpreter, const char *charsetname, CHARSET *charset) {

+INTVAL +Parrot_make_default_charset(Interp *interpreter, const char *charsetname,
+ CHARSET *charset)
+{
Parrot_default_charset_ptr = charset;
return 1;
}
-CHARSET *Parrot_default_charset(Interp *interpreter) {
+CHARSET *
+Parrot_default_charset(Interp *interpreter)
+{
return Parrot_default_charset_ptr;
}
-charset_converter_t Parrot_find_charset_converter(Interp *interpreter, CHARSET *lhs, CHARSET *rhs) {

+charset_converter_t +Parrot_find_charset_converter(Interp *interpreter, CHARSET *lhs, CHARSET *rhs)

+{
return NULL;
}
1.18 +13 -7 parrot/src/utils.c Index: utils.c ===================================================================
RCS file: /cvs/public/parrot/src/utils.c,v retrieving revision 1.17
retrieving revision 1.18
diff -u -r1.17 -r1.18
--- utils.c 27 Feb 2005 09:58:47 -0000 1.17
+++ utils.c 27 Feb 2005 11:03:43 -0000 1.18
@@no-spam -1,6 +1,6 @@no-spam /*
Copyright: 2001-2003 The Perl Foundation. All Rights Reserved.
-$Id: utils.c,v 1.17 2005/02/27 09:58:47 leo Exp $
+$Id: utils.c,v 1.18 2005/02/27 11:03:43 leo Exp $
=head1 NAME @@no-spam -643,7 +643,9 @@no-spam }
INTVAL -Parrot_byte_index(Interp *interpreter, STRING *base, STRING *search, UINTVAL start_offset) {

+Parrot_byte_index(Interp *interpreter, const STRING *base,
+ const STRING *search, UINTVAL start_offset)
+{
INTVAL pos = -1;
char *base_start, *search_start, *current_pos;
INTVAL current_offset;
@@no-spam -654,14 +656,15 @@no-spam max_possible_offset = (base->strlen - search->strlen);
checkloop:
- for (current_offset = start_offset; current_offset <= max_possible_offset; current_offset++) {

+ for (current_offset = start_offset; current_offset <= max_possible_offset;

+ current_offset++) {
base_start = (char *)base->strstart + current_offset;
if (!memcmp(base_start, search_start, searchlen)) {
found = 1;
break;
}
}
- +
if (found) {
pos = current_offset;
}
@@no-spam -669,7 +672,9 @@no-spam }
INTVAL -Parrot_byte_rindex(Interp *interpreter, STRING *base, STRING *search, UINTVAL start_offset) {

+Parrot_byte_rindex(Interp *interpreter, const STRING *base,
+ const STRING *search, UINTVAL start_offset)
+{
INTVAL pos = -1;
char *base_start, *search_start, *current_pos;
INTVAL current_offset;
@@no-spam -682,14 +687,15 @@no-spam max_possible_offset = start_offset;
}
checkloop:
- for (current_offset = max_possible_offset; current_offset >= 0; current_offset--) {

+ for (current_offset = max_possible_offset; current_offset >= 0;
+ current_offset--) {
base_start = (char *)base->strstart + current_offset;
if (!memcmp(base_start, search_start, searchlen)) {
found = 1;
break;
}
}
- +
if (found) {
pos = current_offset;
}