//
//  osxtads_char_mapping.m
//  XTads
//
//  Created by Rune Berg on 12/07/2020.
//  Copyright © 2020 Rune Berg. All rights reserved.
//

#import <Foundation/Foundation.h>
#import "osxtads_support.h"
#import "XTFileUtils.h"


unsigned int map_nonlatin1_unicode_char_for_xlat_html4(unsigned int unicode_char);

/*
 *   Generate a filename for a character-set mapping file.  This function
 *   should determine the current native character set in use, if
 *   possible, then generate a filename, according to system-specific
 *   conventions, that we should attempt to load to get a mapping between
 *   the current native character set and the internal character set
 *   identified by 'internal_id'.
 *
 *   The internal character set ID is a string of up to 4 characters.
 *
 *   On DOS, the native character set is a DOS code page.  DOS code pages
 *   are identified by 3- or 4-digit identifiers; for example, code page
 *   437 is the default US ASCII DOS code page.  We generate the
 *   character-set mapping filename by appending the internal character
 *   set identifier to the DOS code page number, then appending ".TCP" to
 *   the result.  So, to map between ISO Latin-1 (internal ID = "La1") and
 *   DOS code page 437, we would generate the filename "437La1.TCP".
 *
 *   Note that this function should do only two things.  First, determine
 *   the current native character set that's in use.  Second, generate a
 *   filename based on the current native code page and the internal ID.
 *   This function is NOT responsible for figuring out the mapping or
 *   anything like that -- it's simply where we generate the correct
 *   filename based on local convention.
 *
 *   'filename' is a buffer of at least OSFNMAX characters.
 *
 *   'argv0' is the executable filename from the original command line.
 *   This parameter is provided so that the system code can look for
 *   mapping files in the original TADS executables directory, if desired.
 */
/* QTads: Generate a filename for a character-set mapping file.
 *
 * Follow DOS convention: start with the current local charset
 * identifier, then the internal ID, and the suffix ".tcp".  No path
 * prefix, which means look in current directory.  This is what we
 * want, because mapping files are supposed to be distributed with a
 * game, not with the interpreter.
 */
void
os_gen_charmap_filename( char* filename, char* internal_id, char* unused)
{
	// Find the bare name of the mapping file.
	// (XTads uses Unicode/UTF-8 internally, but the T2 VM expects a single-byte to single-byte mapping,
	// so provide a mapping file that does nothing. The real decoding is done later.)
	
	char tempFilename[OSFNMAX];
	strcat(tempFilename, internal_id);
	strcat(tempFilename, internal_id);
	strcat(tempFilename, ".tcp");
	NSUInteger tempFilenameLen = strlen(tempFilename);
	NSString *tempFilenameNSString = XTADS_FILESYSTEM_C_STRING_LEN_TO_NSSTRING(tempFilename, tempFilenameLen);
	
	// Find the fully qualified name of the mapping file.
	
	NSBundle *mainBundle = [NSBundle mainBundle];
	NSString *mainBundleResourcePath = [mainBundle resourcePath];
	NSString *fqFilename = [NSString stringWithFormat:@"%@/%@", mainBundleResourcePath, tempFilenameNSString];
	
	// Fill in result buffer.

	const char *fqFilenameCString = XTADS_NSSTRING_TO_FILESYSTEM_C_STRING(fqFilename);
	size_t fqFilenameCStringLen = strlen(fqFilenameCString);
	strncpy(filename, fqFilenameCString, fqFilenameCStringLen);
	filename[OSFNMAX - 1] = 0;
}

/*
 *   Generate the name of the character set mapping table for Unicode
 *   characters to and from the given local character set.  Fills in the
 *   buffer with the implementation-dependent name of the desired
 *   character set map.  See below for the character set ID codes.
 *
 *   For example, on Windows, the implementation would obtain the
 *   appropriate active code page (which is simply a Windows character set
 *   identifier number) from the operating system, and build the name of
 *   the Unicode mapping file for that code page, such as "CP1252".  On
 *   Macintosh, the implementation would look up the current script system
 *   and return the name of the Unicode mapping for that script system,
 *   such as "ROMAN" or "CENTEURO".
 *
 *   If it is not possible to determine the specific character set that is
 *   in use, this function should return "asc7dflt" (ASCII 7-bit default)
 *   as the character set identifier on an ASCII system, or an appropriate
 *   base character set name on a non-ASCII system.  "asc7dflt" is the
 *   generic character set mapping for plain ASCII characters.
 *
 *   The given buffer must be at least 32 bytes long; the implementation
 *   must limit the result it stores to 32 bytes.  (We use a fixed-size
 *   buffer in this interface for simplicity, and because there seems no
 *   need for greater flexibility in the interface; a character set name
 *   doesn't carry very much information so shouldn't need to be very
 *   long.  Note that this function doesn't generate a filename, but
 *   simply a mapping name; in practice, a map name will be used to
 *   construct a mapping file name.)
 *
 *   Because this function obtains the Unicode mapping name, there is no
 *   need to specify the internal character set to be used: the internal
 *   character set is Unicode.
 */
/*
 *   Implementation note: when porting this routine, the convention that
 *   you use to name your mapping files is up to you.  You should simply
 *   choose a convention for this implementation, and then use the same
 *   convention for packaging the mapping files for your OS release.  In
 *   most cases, the best convention is to use the names that the Unicode
 *   consortium uses in their published cross-mapping listings, since
 *   these listings can be used as the basis of the mapping files that you
 *   include with your release.  For example, on Windows, the convention
 *   is to use the code page number to construct the map name, as in
 *   CP1252 or CP1250.
 */
/* Generate the name of the character set mapping table for Unicode
 * characters to and from the given local character set.
 *
 * We use UTF-8 for everything, which should work on all platforms.
 */
void
os_get_charmap( char* mapname, int charmap_id )
{
	XTOSIFC_TRACE_ENTRY(@"os_get_charmap");
	
	strcpy(mapname, "utf-8");
}

/*
 *   Receive notification that a character mapping file has been loaded.
 *   The caller doesn't require this routine to do anything at all; this
 *   is purely for the system-dependent code's use so that it can take
 *   care of any initialization that it must do after the caller has
 *   loaded a charater mapping file.  'id' is the character set ID, and
 *   'ldesc' is the display name of the character set.  'sysinfo' is the
 *   extra system information string that is stored in the mapping file;
 *   the interpretation of this information is up to this routine.
 *
 *   For reference, the Windows version uses the extra information as a
 *   code page identifier, and chooses its default font character set to
 *   match the code page.  On DOS, the run-time requires the player to
 *   activate an appropriate code page using a DOS command (MODE CON CP
 *   SELECT) prior to starting the run-time, so this routine doesn't do
 *   anything at all on DOS.
 */
void
os_advise_load_charmap( char* id, char* ldes, char* sysinfo)
{
	NSString *interalCharSetId = [NSString stringWithUTF8String:id];
	[getGameRunner() setTads2InternalCharSet:interalCharSetId];
}

/*
 *   Translate a character from the HTML 4 Unicode character set to the
 *   current character set used for display.  Takes an HTML 4 character
 *   code and returns the appropriate local character code.
 *
 *   The result buffer should be filled in with a null-terminated string
 *   that should be used to represent the character.  Multi-character
 *   results are possible, which may be useful for certain approximations
 *   (such as using "(c)" for the copyright symbol).
 *
 *   Note that we only define this prototype if this symbol isn't already
 *   defined as a macro, which may be the case on some platforms.
 *   Alternatively, if the function is already defined (for example, as an
 *   inline function), the defining code can define OS_XLAT_HTML4_DEFINED,
 *   in which case we'll also omit this prototype.
 *
 *   Important: this routine provides the *default* mapping that is used
 *   when no external character mapping file is present, and for any named
 *   entities not defined in the mapping file.  Any entities in the
 *   mapping file, if used, will override this routine.
 *
 *   A trivial implementation of this routine (that simply returns a
 *   one-character result consisting of the original input character,
 *   truncated to eight bits if necessary) can be used if you want to
 *   require an external mapping file to be used for any game that
 *   includes HTML character entities.  The DOS version implements this
 *   routine so that games will still look reasonable when played with no
 *   mapping file present, but other systems are not required to do this.
 */
// Only used for T2 transcripts.
void
os_xlat_html4(unsigned int html4_char, char *result, size_t result_buf_len)
{
	unichar ch = (unichar)html4_char;
	ch = map_nonlatin1_unicode_char_for_xlat_html4(ch);
	NSString *s = [[NSString alloc] initWithCharacters:&ch length:1];
	const char *tempResult = [getGameRunner() makeCStringQuiet:s];
	if (tempResult == nil) {
		// try ASCII-fying it
		ch = map_nonlatin1_unicode_char_for_xlat_html4(ch);
		NSString *s = [[NSString alloc] initWithCharacters:&ch length:1];
		const char *tempResult = [getGameRunner() makeCStringQuiet:s];
		if (tempResult == nil) {
			// No luck, give up
			tempResult = "?";
		}
	}
	strcpy(result, tempResult);
}

/*
 *  This is purely to make T2 transcripts look better for Unicode chars that have no ASCII equivalents
 */
unsigned int map_nonlatin1_unicode_char_for_xlat_html4(unsigned int unicode_char)
{
	if (unicode_char > 255)
		{
		switch(unicode_char)
			{
			case 338: unicode_char = 'O'; break;  // Latin capital ligature oe
			case 339: unicode_char = 'o'; break;  // Latin small ligature oe
			case 376: unicode_char = 'Y'; break;  // Latin capital letter y with diaeresis
			case 352: unicode_char = 'S'; break;  // Latin capital letter s with caron
			case 353: unicode_char = 's'; break;  // Latin small letter s with caron
			case 8211: unicode_char = '-'; break; // en dash
			case 8212: unicode_char = '-'; break; // em dash
			case 8216: unicode_char = '\''; break; // left single quotation mark
			case 8217: unicode_char = '\''; break;  // right single quotation mark
			case 8218: unicode_char = '\''; break; // single low-9 quotation mark
			case 8220: unicode_char = '"'; break;  // left double quotation mark
			case 8221: unicode_char = '"'; break;  // right double quotation mark
			case 8222: unicode_char = '"'; break;  // double low-9 quotation mark
			//case 8224: unicode_char = 134; break;  // dagger, obelisk
			//case 8225: unicode_char = 135; break;  // double dagger, double obelisk
			//case 8240: unicode_char = 137; break;  // per mille sign
			case 8249: unicode_char = '\''; break;  // single left-pointing angle quotation mark[
			case 8250: unicode_char = '\''; break;  // single right-pointing angle quotation mark[g]
			//case 8482: unicode_char = 153; break; // trademark symbol
			default: break;
			}
		}
	return unicode_char;
}
