diff -r -c ../../inform63/asm.c ./asm.c *** ../../inform63/asm.c Fri Feb 27 06:30:00 2004 --- ./asm.c Thu Dec 16 20:21:02 2004 *************** *** 39,44 **** --- 39,47 ---- int sequence_point_follows; /* Will the next instruction assembled */ /* be at a sequence point in the routine? */ + int uses_unicode_features; /* Makes use of Glulx Unicode (3.0) + features? */ + dbgl debug_line_ref; /* Source code ref of current statement */ *************** *** 552,557 **** --- 555,561 ---- { (uchar *) "callfi", 0x0161, St, 0, 3 }, { (uchar *) "callfii", 0x0162, St, 0, 4 }, { (uchar *) "callfiii", 0x0163, St, 0, 5 }, + { (uchar *) "streamunichar", 0x73, 0, 0, 1 }, }; static opcodeg custom_opcode_g; *************** *** 976,981 **** --- 980,989 ---- execution_never_reaches_here = ((opco.flags & Rf) != 0); + if (AI->internal_number == streamunichar_gc) { + uses_unicode_features = TRUE; + } + no_operands_given = AI->operand_count; /* 1. Write the opcode byte(s) */ *************** *** 2826,2831 **** --- 2834,2841 ---- { int i; for (i=0;i<16;i++) flags2_requirements[i]=0; + + uses_unicode_features = FALSE; sequence_point_follows = TRUE; label_moved_error_already_given = FALSE; diff -r -c ../../inform63/chars.c ./chars.c *** ../../inform63/chars.c Fri Feb 27 06:30:00 2004 --- ./chars.c Thu Dec 16 23:36:47 2004 *************** *** 41,46 **** --- 41,47 ---- /* character_set_setting source_to_iso_grid[] */ /* zscii_to_unicode_grid[] */ /* zscii_to_iso_grid[] */ + /* iso_to_unicode_grid[] */ /* alphabet[][] iso_to_alphabet_grid[] */ /* zscii_to_alphabet_grid[] */ /* zscii_to_unicode_grid[] iso_to_alphabet_grid[] */ *************** *** 73,78 **** --- 74,81 ---- uchar source_to_iso_grid[0x100]; /* Filters source code into legal ISO */ + int32 iso_to_unicode_grid[0x100]; /* Filters ISO into Unicode */ + int character_digit_value[128]; /* Parsing of binary, decimal and hex */ static char *accents = /* Standard 0.2 stock of accented... */ *************** *** 171,176 **** --- 174,180 ---- zscii_to_iso_grid[z] = j; iso_to_alphabet_grid[j] = -z; } + iso_to_unicode_grid[j] = iso_to_unicode(j); } for (j=0; j<3; j++) for (k=(j<2?0:1); k<26; k++) diff -r -c ../../inform63/files.c ./files.c *** ../../inform63/files.c Fri Feb 27 06:30:00 2004 --- ./files.c Thu Dec 16 23:12:53 2004 *************** *** 242,247 **** --- 242,255 ---- sf_put('\0'); (*size) += 1; break; + case 4: + val = unicode_usage_entries[ent->u.val].ch; + sf_put((val >> 24) & 0xFF); + sf_put((val >> 16) & 0xFF); + sf_put((val >> 8) & 0xFF); + sf_put((val) & 0xFF); + (*size) += 4; + break; case 9: val = abbreviations_offset + 4 + ent->u.val*4; sf_put((val >> 24) & 0xFF); *************** *** 469,474 **** --- 477,483 ---- static void output_file_g(void) { FILE *fin; char new_name[PATHLEN]; int32 size, i, j; + int32 val; ASSERT_GLULX(); *************** *** 498,508 **** sf_put('l'); sf_put('u'); sf_put('l'); ! /* Version number -- 0x00020000 for now. */ ! sf_put(0x00); ! sf_put(0x02); ! sf_put(0x00); ! sf_put(0x00); /* RAMSTART */ sf_put((Write_RAM_At >> 24)); sf_put((Write_RAM_At >> 16)); --- 507,521 ---- sf_put('l'); sf_put('u'); sf_put('l'); ! /* Version number. */ ! if (no_unicode_chars == 0 && (!uses_unicode_features)) ! val = 0x00020000; ! else ! val = 0x00030000; ! sf_put((val >> 24)); ! sf_put((val >> 16)); ! sf_put((val >> 8)); ! sf_put((val)); /* RAMSTART */ sf_put((Write_RAM_At >> 24)); sf_put((Write_RAM_At >> 16)); *************** *** 750,756 **** else if (ch == '0') { ch = '\0'; } ! else if (ch == 'A' || ch == 'D') { escapelen = 4; escapetype = ch; escapeval = 0; --- 763,769 ---- else if (ch == '0') { ch = '\0'; } ! else if (ch == 'A' || ch == 'D' || ch == 'U') { escapelen = 4; escapetype = ch; escapeval = 0; *************** *** 769,774 **** --- 782,790 ---- } else if (escapetype == 'D') { ch = huff_dynam_start+escapeval; + } + else if (escapetype == 'U') { + ch = huff_unicode_start+escapeval; } else { compiler_error("Strange @ escape in processed text."); diff -r -c ../../inform63/header.h ./header.h *** ../../inform63/header.h Wed Mar 3 00:08:55 2004 --- ./header.h Thu Dec 16 23:37:18 2004 *************** *** 1117,1122 **** --- 1117,1123 ---- #define callfi_gc 77 #define callfii_gc 78 #define callfiii_gc 79 + #define streamunichar_gc 80 #define SYMBOL_TT 0 /* value = index in symbol table */ #define NUMBER_TT 1 /* value = the number */ *************** *** 1993,1998 **** --- 1994,2000 ---- extern int32 no_instructions; extern int sequence_point_follows; + extern int uses_unicode_features; extern dbgl debug_line_ref; extern int execution_never_reaches_here; extern int *variable_usage; *************** *** 2111,2116 **** --- 2113,2119 ---- /* ------------------------------------------------------------------------- */ extern uchar source_to_iso_grid[]; + extern int32 iso_to_unicode_grid[]; extern int character_digit_value[]; extern uchar alphabet[3][27]; extern int alphabet_modified; *************** *** 2380,2386 **** extern int32 MAX_STATIC_STRINGS, MAX_ZCODE_SIZE, MAX_LINK_DATA_SIZE, MAX_TRANSCRIPT_SIZE, MAX_INDIV_PROP_TABLE_SIZE, ! MAX_NUM_STATIC_STRINGS; extern int32 MAX_OBJ_PROP_COUNT, MAX_OBJ_PROP_TABLE_SIZE; extern int MAX_LOCAL_VARIABLES, MAX_GLOBAL_VARIABLES; --- 2383,2389 ---- extern int32 MAX_STATIC_STRINGS, MAX_ZCODE_SIZE, MAX_LINK_DATA_SIZE, MAX_TRANSCRIPT_SIZE, MAX_INDIV_PROP_TABLE_SIZE, ! MAX_NUM_STATIC_STRINGS, MAX_UNICODE_CHARS; extern int32 MAX_OBJ_PROP_COUNT, MAX_OBJ_PROP_TABLE_SIZE; extern int MAX_LOCAL_VARIABLES, MAX_GLOBAL_VARIABLES; *************** *** 2543,2551 **** --- 2546,2563 ---- compression. */ extern int32 no_strings, no_dynamic_strings; + extern int no_unicode_chars; #define MAX_DYNAMIC_STRINGS (64) + typedef struct unicode_usage_s unicode_usage_t; + struct unicode_usage_s { + int32 ch; + unicode_usage_t *next; + }; + + extern unicode_usage_t *unicode_usage_entries; + /* This is the maximum number of (8-bit) bytes that can encode a single Huffman entity. Four should be plenty, unless someone starts encoding an ideographic language. */ *************** *** 2571,2577 **** extern int32 compression_table_size, compression_string_size; extern int32 *compressed_offsets; ! extern int no_huff_entities, huff_abbrev_start, huff_dynam_start; extern int huff_entity_root; extern void compress_game_text(void); --- 2583,2590 ---- extern int32 compression_table_size, compression_string_size; extern int32 *compressed_offsets; ! extern int no_huff_entities; ! extern int huff_abbrev_start, huff_dynam_start, huff_unicode_start; extern int huff_entity_root; extern void compress_game_text(void); diff -r -c ../../inform63/lexer.c ./lexer.c *** ../../inform63/lexer.c Fri Feb 27 06:30:00 2004 --- ./lexer.c Mon Dec 13 20:43:10 2004 *************** *** 264,269 **** --- 264,270 ---- "glk", "getstringtbl", "setstringtbl", "getiosys", "setiosys", "linearsearch", "binarysearch", "linkedsearch", "callf", "callfi", "callfii", "callfiii", + "streamunichar", "" }; diff -r -c ../../inform63/memory.c ./memory.c *** ../../inform63/memory.c Fri Feb 27 06:30:00 2004 --- ./memory.c Tue Dec 14 20:18:22 2004 *************** *** 174,179 **** --- 174,180 ---- int DICT_WORD_SIZE; int NUM_ATTR_BYTES; int32 MAX_NUM_STATIC_STRINGS; + int32 MAX_UNICODE_CHARS; /* The way memory sizes are set causes great nuisance for those parameters which have different defaults under Z-code and Glulx. We have to get *************** *** 229,234 **** --- 230,237 ---- printf("| %25s = %-7d |\n","SYMBOLS_CHUNK_SIZE",SYMBOLS_CHUNK_SIZE); printf("| %25s = %-7ld |\n","MAX_TRANSCRIPT_SIZE", (long int) MAX_TRANSCRIPT_SIZE); + printf("| %25s = %-7ld |\n","MAX_UNICODE_CHARS", + (long int) MAX_UNICODE_CHARS); printf("| %25s = %-7d |\n","MAX_VERBS",MAX_VERBS); printf("| %25s = %-7d |\n","MAX_VERBSPACE",MAX_VERBSPACE); printf("| %25s = %-7ld |\n","MAX_ZCODE_SIZE", *************** *** 392,397 **** --- 395,401 ---- DICT_WORD_SIZE_g = 9; NUM_ATTR_BYTES_z = 6; NUM_ATTR_BYTES_g = 7; + MAX_UNICODE_CHARS = 64; adjust_memory_sizes(); } *************** *** 552,558 **** Inform automatically ensures that this is at least twice the size of\n\ MAX_QTEXT_SIZE, to be on the safe side."); return; - } if (strcmp(command,"MAX_ZCODE_SIZE")==0) { --- 556,561 ---- *************** *** 644,650 **** " MAX_NUM_STATIC_STRINGS is the maximum number of compiled strings \n\ allowed in the program. (Glulx only)\n"); return; ! } printf("No such memory setting as \"%s\"\n",command); --- 647,660 ---- " MAX_NUM_STATIC_STRINGS is the maximum number of compiled strings \n\ allowed in the program. (Glulx only)\n"); return; ! } ! if (strcmp(command,"MAX_UNICODE_CHARS")==0) ! { ! printf( ! " MAX_UNICODE_CHARS is the maximum number of different Unicode characters \n\ ! (beyond the Latin-1 range, $00..$FF) which the game text can use. \n\ ! (Glulx only)\n"); ! return; } printf("No such memory setting as \"%s\"\n",command); *************** *** 779,784 **** --- 789,796 ---- { MAX_GLOBAL_VARIABLES=j, flag=1; MAX_GLOBAL_VARIABLES_g=MAX_GLOBAL_VARIABLES_z=j; } + if (strcmp(command,"MAX_UNICODE_CHARS")==0) + MAX_UNICODE_CHARS=j, flag=1; if (flag==0) printf("No such memory setting as \"%s\"\n", command); diff -r -c ../../inform63/text.c ./text.c *** ../../inform63/text.c Fri Feb 27 06:30:00 2004 --- ./text.c Thu Dec 16 23:46:03 2004 *************** *** 52,57 **** --- 52,59 ---- int no_dynamic_strings; /* No. of @.. string escapes used (actually, the highest value used plus one) */ + int no_unicode_chars; /* Number of distinct Unicode chars + used. (Beyond 0xFF.) */ static int MAX_CHARACTER_SET; /* Number of possible entities */ huffentity_t *huff_entities; /* The list of entities (characters, *************** *** 60,65 **** --- 62,69 ---- static huffentity_t **hufflist; /* Copy of the list, for sorting */ int no_huff_entities; /* The number of entities in the list */ + int huff_unicode_start; /* Position in the list where Unicode + chars begin. */ int huff_abbrev_start; /* Position in the list where string abbreviations begin. */ int huff_dynam_start; /* Position in the list where @.. *************** *** 78,83 **** --- 82,93 ---- of the Huffman table. (So entry 0 is equal to compression_table_size)*/ + #define UNICODE_HASH_BUCKETS (64) + unicode_usage_t *unicode_usage_entries; + static unicode_usage_t *unicode_usage_hash[UNICODE_HASH_BUCKETS]; + + static int unicode_entity_index(int32 unicode); + /* ------------------------------------------------------------------------- */ /* Abbreviation arrays */ /* ------------------------------------------------------------------------- */ *************** *** 537,542 **** --- 547,553 ---- "@0" to indicate a zero byte, "@ANNNN" to indicate an abbreviation, "@DNNNN" to indicate a dynamic string thing. + "@UNNNN" to indicate a four-byte Unicode value (0x100 or higher). (NNNN is a four-digit hex number using the letters A-P... an ugly representation but a convenient one.) */ *************** *** 623,629 **** write_z_char_g(unicode); } else { ! error("Unicode characters beyond Latin-1 are not yet supported in Glulx"); } } } --- 634,653 ---- write_z_char_g(unicode); } else { ! if (!compression_switch) { ! warning("Unicode characters will not work in non-compressed \ ! string; substituting '?'."); ! write_z_char_g('?'); ! } ! else { ! j = unicode_entity_index(unicode); ! write_z_char_g('@'); ! write_z_char_g('U'); ! write_z_char_g('A' + ((j >>12) & 0x0F)); ! write_z_char_g('A' + ((j >> 8) & 0x0F)); ! write_z_char_g('A' + ((j >> 4) & 0x0F)); ! write_z_char_g('A' + ((j ) & 0x0F)); ! } } } } *************** *** 631,638 **** write_z_char_g(0x0A); else if (text_in[i] == '~') write_z_char_g('"'); ! else ! write_z_char_g(text_in[i]); } write_z_char_g(0); --- 655,682 ---- write_z_char_g(0x0A); else if (text_in[i] == '~') write_z_char_g('"'); ! else { ! unicode = iso_to_unicode_grid[text_in[i]]; ! if (unicode >= 0 && unicode < 256) { ! write_z_char_g(unicode); ! } ! else { ! if (!compression_switch) { ! warning("Unicode characters will not work in non-compressed \ ! string; substituting '?'."); ! write_z_char_g('?'); ! } ! else { ! j = unicode_entity_index(unicode); ! write_z_char_g('@'); ! write_z_char_g('U'); ! write_z_char_g('A' + ((j >>12) & 0x0F)); ! write_z_char_g('A' + ((j >> 8) & 0x0F)); ! write_z_char_g('A' + ((j >> 4) & 0x0F)); ! write_z_char_g('A' + ((j ) & 0x0F)); ! } ! } ! } } write_z_char_g(0); *************** *** 641,646 **** --- 685,721 ---- return((uchar *) text_out_pc); } + static int unicode_entity_index(int32 unicode) + { + unicode_usage_t *uptr; + int j; + int buck = unicode % UNICODE_HASH_BUCKETS; + + for (uptr = unicode_usage_hash[buck]; uptr; uptr=uptr->next) { + if (uptr->ch == unicode) + break; + } + if (uptr) { + j = (uptr - unicode_usage_entries); + } + else { + if (no_unicode_chars >= MAX_UNICODE_CHARS) { + memoryerror("MAX_UNICODE_CHARS", MAX_UNICODE_CHARS); + j = 0; + } + else { + j = no_unicode_chars; + no_unicode_chars++; + uptr = unicode_usage_entries + j; + uptr->ch = unicode; + uptr->next = unicode_usage_hash[buck]; + unicode_usage_hash[buck] = uptr; + } + } + + return j; + } + /* ------------------------------------------------------------------------- */ /* Glulx compression code */ /* ------------------------------------------------------------------------- */ *************** *** 664,671 **** if (compression_switch) { /* How many entities have we currently got? Well, 256 plus the ! string-terminator plus abbrevations plus dynamic strings. */ entities = 256+1; huff_abbrev_start = entities; if (economy_switch) entities += no_abbreviations; --- 739,749 ---- if (compression_switch) { /* How many entities have we currently got? Well, 256 plus the ! string-terminator plus Unicode chars plus abbrevations plus ! dynamic strings. */ entities = 256+1; + huff_unicode_start = entities; + entities += no_unicode_chars; huff_abbrev_start = entities; if (economy_switch) entities += no_abbreviations; *************** *** 684,689 **** --- 762,772 ---- /* Terminator */ huff_entities[256].type = 1; huff_entities[256].count = 0; + for (jx=0; jx= huff_dynam_start) { compression_string_size += 3; } ! else if (ch >= huff_abbrev_start) { ! compiler_error("Abbreviation in non-compressed string should \ ! be impossible."); } else compression_string_size += 1; --- 1014,1022 ---- if (ch >= huff_dynam_start) { compression_string_size += 3; } ! else if (ch >= huff_unicode_start) { ! compiler_error("Abbreviation/Unicode in non-compressed string \ ! should be impossible."); } else compression_string_size += 1; *************** *** 973,978 **** --- 1063,1072 ---- cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH; printf("abbrev %d, \"%s\"\n", ent->u.val, cx); break; + case 4: + ix = ent->u.val; + printf("'U+%lX'\n", unicode_usage_entries[ix].ch); + break; case 9: printf("print-var @%02d\n", ent->u.val); break; *************** *** 1009,1014 **** --- 1103,1109 ---- cx = (char *)abbreviations_at + ent->u.val*MAX_ABBREV_LENGTH; compression_table_size += (1 + 1 + strlen(cx)); break; + case 4: case 9: compression_table_size += 5; break; *************** *** 2037,2042 **** --- 2132,2138 ---- static_strings_extent = 0; no_strings = 0; no_dynamic_strings = 0; + no_unicode_chars = 0; } /* Note: for allocation and deallocation of all_the_text, see inform.c */ *************** *** 2068,2073 **** --- 2164,2170 ---- huff_entities = NULL; hufflist = NULL; + unicode_usage_entries = NULL; done_compression = FALSE; compression_table_size = 0; compressed_offsets = NULL; *************** *** 2076,2086 **** if (glulx_mode) { if (compression_switch) { ! MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS; huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1, "huffman entities"); hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET, "huffman node list"); } compressed_offsets = my_calloc(sizeof(int32), MAX_NUM_STATIC_STRINGS, "static strings index table"); --- 2173,2189 ---- if (glulx_mode) { if (compression_switch) { ! int ix; ! MAX_CHARACTER_SET = 257 + MAX_ABBREVS + MAX_DYNAMIC_STRINGS ! + MAX_UNICODE_CHARS; huff_entities = my_calloc(sizeof(huffentity_t), MAX_CHARACTER_SET*2+1, "huffman entities"); hufflist = my_calloc(sizeof(huffentity_t *), MAX_CHARACTER_SET, "huffman node list"); + unicode_usage_entries = my_calloc(sizeof(unicode_usage_t), + MAX_UNICODE_CHARS, "unicode entity entries"); + for (ix=0; ix