#!/usr/bin/perl

#
# Convert font from JISX0208.1983 to Unicode encoding
# May 2004 --pkv
# See http://www.tzone.org/~vandry/howto/bigger-fonts-in-xterm.html
#

use strict;

###
### Build a mapping from JIS codes to Unicode using charcters not
### found in Kanjidic
###

my $jis_to_unicode = {
	0x2121 => 0x3000,	# wide space
	0x2122 => 0x3001,	# japanese comma
	0x2123 => 0x3002,	# japanses dot
	0x2124 => 0xff0c,	# wide comma
	0x2125 => 0xff0e,	# wide dot
	0x2126 => 0x30fb,	# wide center dot
	0x2127 => 0xff1a,	# wide colon
	0x2128 => 0xff1b,	# wide semicolon
	0x2129 => 0xff1f,	# wide question mark
	0x212a => 0xff01,	# wide bang
	0x212b => 0x309b,	# voice diacritic
	0x212c => 0x309c,	# circle diacritic
	0x212d => 0x00b4,	# acute accent
	0x212e => 0xff40,	# grave accent
	0x212f => 0x00a8,	# umlaut
	0x2130 => 0xff3e,	# circumflex
	0x2131 => 0xffe3,	# overline
	0x2132 => 0xff3f,	# underline
	0x2133 => 0x30fd,	# repetition mark in katakana
	0x2134 => 0x30fe,	# voiced repetition mark in katakana
	0x2135 => 0x309d,	# repetition mark in hiragana
	0x2136 => 0x309e,	# voiced repetition mark in hiragana
	0x2137 => 0x3003,	# ditto mark
	0x2138 => 0x4edd,	# "as above" mark
	0x2139 => 0x3005,	# repetition of kanji
	0x213a => 0x3006,	# end or closure mark
	0x213b => 0x3007,	# circle
	0x213c => 0x30fc,	# long sound
	0x213d => 0x2015,	# horizontal bar
	0x213e => 0x2010,	# short horizontal bar
	0x213f => 0xff0f,	# wide slash
	0x2140 => 0xff3c,	# wide backslash
	0x2141 => 0x301c,	# wide tilde
	0x2142 => 0x2016,	# double vertical bar
	0x2143 => 0xff5c,	# single vertical bar
	0x2144 => 0x2026,	# three dots
	0x2145 => 0x2025,	# two dots
	0x2146 => 0x2018,	# open single quote
	0x2147 => 0x2019,	# close single quote
	0x2148 => 0x201c,	# open double quote
	0x2149 => 0x201d,	# close double quote
	0x214a => 0xff08,	# wide left paranthesis
	0x214b => 0xff09,	# wide right paranthesis
	0x214c => 0x3014,	# wide left bent square bracket
	0x214d => 0x3015,	# wide right bent square bracket
	0x214e => 0xff3b,	# wide left square bracket
	0x214f => 0xff3d,	# wide right square bracket
	0x2150 => 0xff5b,	# wide left curly bracket
	0x2151 => 0xff5d,	# wide right curly bracket
	0x2152 => 0x3008,	# wide less than
	0x2153 => 0x3009,	# wide greater than
	0x2154 => 0x300a,	# wide much less than
	0x2155 => 0x300b,	# wide much greater than
	0x2156 => 0x300c,	# japanese open single quote
	0x2157 => 0x300d,	# japanese close single quote
	0x2158 => 0x300e,	# japanese open double quote
	0x2159 => 0x300f,	# japanese close double quote
	0x215a => 0x3010,	# bold left square bracket
	0x215b => 0x3011,	# bold right square bracket
	0x215c => 0xff0b,	# wide plus
	0x215d => 0x2212,	# wide minus
	0x215e => 0x00b1,	# plus or minus
	0x215f => 0x00d7,	# cross multiplication
	0x2160 => 0x00f7,	# divide
	0x2161 => 0xff1d,	# wide equals
	0x2162 => 0x2260,	# wide not equal
	0x2163 => 0xff1c,	# wide smashed less than
	0x2164 => 0xff1e,	# wide smashed greater than
	0x2165 => 0x2266,	# less than or equal to
	0x2166 => 0x2267,	# greater than or equal to
	0x2167 => 0x221e,	# infinity
	0x2168 => 0x2234,	# therefore
	0x2169 => 0x2642,	# male
	0x216a => 0x2640,	# female
	0x216b => 0x00b0,	# degree
	0x216c => 0x2032,	# minute
	0x216d => 0x2033,	# second
	0x216e => 0x2103,	# degrees celcius
	0x216f => 0xffe5,	# Yen
	0x2170 => 0xff04,	# wide dollar
	0x2171 => 0x00a2,	# cents
	0x2172 => 0x00a3,	# pound
	0x2173 => 0xff05,	# wide percent
	0x2174 => 0xff03,	# wide hash
	0x2175 => 0xff06,	# wide ampersand
	0x2176 => 0xff0a,	# wide star
	0x2177 => 0xff20,	# wide @
	0x2178 => 0x00a7,	# section mark
	0x2179 => 0x2606,	# open star
	0x217a => 0x2605,	# closed star
	0x217b => 0x25cb,	# open circle
	0x217c => 0x25cf,	# closed circle
	0x217d => 0x25ce,	# double circle
	0x217e => 0x25c7,	# open rotated square
	0x2221 => 0x25c6,	# closed rotated square
	0x2222 => 0x25a1,	# open box
	0x2223 => 0x25a0,	# closed box
	0x2224 => 0x25b3,	# open triangle
	0x2225 => 0x25b2,	# closed triangle
	0x2226 => 0x25bd,	# open upside down triangle
	0x2227 => 0x25bc,	# closed upside down triangle
	0x2228 => 0x203b,	# fancy thing
	0x2229 => 0x3012,	# post
	0x222a => 0x2192,	# right arrow
	0x222b => 0x2190,	# left arrow
	0x222c => 0x2191,	# up arrow
	0x222d => 0x2193,	# down arrow
	0x222e => 0x3013,	# thick equals
	0x223a => 0x2208,	# element of
	0x223b => 0x220b,	# backwards element of
	0x223c => 0x2286,	# subset of
	0x223d => 0x2287,	# superset of
	0x223e => 0x2282,	# proper subset of
	0x223f => 0x2283,	# proper superset of
	0x2240 => 0x222a,	# union
	0x2241 => 0x2229,	# intersection
	0x224a => 0x2227,	# hat
	0x224b => 0x2228,	# vee
	0x224c => 0x00ac,	# nook
	0x224d => 0x21d2,	# implies
	0x224e => 0x21d4,	# iff
	0x224f => 0x2200,	# forall
	0x2250 => 0x2203,	# exists
	0x225c => 0x2220,	# angle
	0x225d => 0x22a5,	# perpendicular
	0x225e => 0x2312,	# top parenthesis
	0x225f => 0x2202,	# partial differentiation
	0x2260 => 0x2207,	# delta
	0x2261 => 0x2261,	# definition
	0x2262 => 0x2252,	# funky equals
	0x2263 => 0x226a,	# tight much less than
	0x2264 => 0x226b,	# tight much greater than
	0x2265 => 0x221a,	# root
	0x2266 => 0x223d,	# chain link
	0x2267 => 0x221d,	# open infinity
	0x2268 => 0x2235,	# upside down therefore
	0x2269 => 0x222b,	# integral
	0x226a => 0x222c,	# double integral
	0x2272 => 0x212b,	# capital A with circle above
	0x2273 => 0x2030,	# perthousand
	0x2274 => 0x266f,	# sharp
	0x2275 => 0x266d,	# flat
	0x2276 => 0x266a,	# note
	0x2277 => 0x2020,	# dagger
	0x2278 => 0x2021,	# double dagger
	0x2279 => 0x00b6,	# paragraph
	0x227e => 0x25ef,	# big open circle
	0x2727 => 0x0401,	# capital IO
	0x2757 => 0x0451,	# lowercase io
	0x2821 => 0x2500,	# box drawing: -
	0x2822 => 0x2502,	# box drawing: |
	0x2823 => 0x250c,	# box drawing: /-
	0x2824 => 0x2510,	# box drawing: -\
	0x2825 => 0x2518,	# box drawing: -/
	0x2826 => 0x2514,	# box drawing: \-
	0x2827 => 0x251c,	# box drawing: |-
	0x2828 => 0x252c,	# box drawing: T
	0x2829 => 0x2524,	# box drawing: -|
	0x282a => 0x2534,	# box drawing: _|_
	0x282b => 0x253c,	# box drawing: +
	0x282c => 0x2501,	# box drawing: bold -
	0x282d => 0x2503,	# box drawing: bold |
	0x282e => 0x250f,	# box drawing: bold /-
	0x282f => 0x2513,	# box drawing: bold -\
	0x2830 => 0x251b,	# box drawing: bold -/
	0x2831 => 0x2517,	# box drawing: bold \-
	0x2832 => 0x2523,	# box drawing: bold |-
	0x2833 => 0x2533,	# box drawing: bold T
	0x2834 => 0x252b,	# box drawing: bold -|
	0x2835 => 0x253b,	# box drawing: bold _|_
	0x2836 => 0x254b,	# box drawing: bold +
	0x2837 => 0x2520,	# box drawing: bold left |-
	0x2838 => 0x252f,	# box drawing: bold top T
	0x2839 => 0x2528,	# box drawing: bold right -|
	0x283a => 0x2537,	# box drawing: bold bottom _|_
	0x283b => 0x253f,	# box drawing: bold h +
	0x283c => 0x251d,	# box drawing: bold right |-
	0x283d => 0x2530,	# box drawing: bold bottom T
	0x283e => 0x2525,	# box drawing: bold left -|
	0x283f => 0x2538,	# box drawing: bold top _|_
	0x2840 => 0x2542,	# box drawing: bold v +
};

# wide ASCII
for (0..9, 17..42, 49..74) {
	$jis_to_unicode->{$_ + 0x2330} = $_ + 0xff10;
}

# Hiragana
for (0..82) {
	$jis_to_unicode->{$_ + 0x2421} = $_ + 0x3041;
}

# Katakana
for (0..85) {
	$jis_to_unicode->{$_ + 0x2521} = $_ + 0x30a1;
}

# Greek letters 1
for (0..16) {
	$jis_to_unicode->{$_ + 0x2621} = $_ + 0x391;
	$jis_to_unicode->{$_ + 0x2641} = $_ + 0x3b1;
}

# Greek letters 2
for (17..23) {
	$jis_to_unicode->{$_ + 0x2621} = $_ + 0x392;
	$jis_to_unicode->{$_ + 0x2641} = $_ + 0x3b2;
}

# Cyrillic 1
for (0..5) {
	$jis_to_unicode->{$_ + 0x2721} = $_ + 0x410;
	$jis_to_unicode->{$_ + 0x2751} = $_ + 0x430;
}

# Cyrillic 2
for (7..32) {
	$jis_to_unicode->{$_ + 0x2721} = $_ + 0x40f;
	$jis_to_unicode->{$_ + 0x2751} = $_ + 0x42f;
}

###
### Supplement the mapping using Kanjidic
###

my $KANJIDIC = "/usr/share/edict/kanjidic";

open(K, "<", $KANJIDIC) || die "cannot open kanjidic";

if (open(PIPE, "-|") == 0) {
	open(STDIN, "<&K");
	close K;
	exec 'sh', '-c', 'cut -d\  -f1,2 | iconv -f EUC-JP -t UCS-4';
}
close K;

my ($buffer, $b2);
my ($fill, $i);

my $value;
my $char;
my $mode = 'c';

$b2 = '';

while (($i = read(PIPE, $buffer, 4096-$fill)) > 0) {
	if ($b2 ne '') {
		$buffer = $b2 . $buffer;
		$i += $fill;
	}
	if ($i & 3) {
		$fill = $i-($i&3);
		$b2 = substr($buffer, $fill);
		$buffer = substr($buffer, 0, $fill);
	} else {
		$fill = 0;
		$b2 = "";
	}
	for (unpack("N*", $buffer)) {
		if ($mode eq 'c') {
			if (($_ != 10) && ($_ != 13)) {
				$char = $_;
				$mode = ' ';
			}
		} elsif ($mode eq ' ') {
			$value = 0;
			$mode = ($_ == 32) ? 'v' : 'x';
		} elsif ($mode eq 'v') {
			if (($_ == 10) || ($_ == 13)) {
				$jis_to_unicode->{$value} = $char;
				$mode = 'c';
			} elsif (($_ >= 48) && ($_ < 58)) {
				$value <<= 4;
				$value += ($_ - 48);
			} elsif (($_ >= 65) && ($_ < 71)) {
				$value <<= 4;
				$value += ($_ - 55);
			} elsif (($_ >= 97) && ($_ < 103)) {
				$value <<= 4;
				$value += ($_ - 87);
			} else {
				$mode = 'x';
			}
		} elsif ($mode eq 'x') {
			$mode = 'c' if (($_ == 10) || ($_ == 13));
		}
	}
}
close PIPE;

###
### Read the font from stdin and dump it to stdout translating
### using what we have just built
###

while (<STDIN>) {
	if (/(^ENCODING\s+)(\d+)(\s*)$/s) {
		if (defined($jis_to_unicode->{$2})) {
			$_ = $1 . $jis_to_unicode->{$2} . $3;
		} else {
			print STDERR sprintf "unknown JIS character 0x%04x left alone\n", $2;
		}
	} elsif (/(^FONT\s.*)-JISX0208.1983-0(\s*)$/s) {
		$_ = $1 . "-ISO10646-1" . $2;
	} elsif (/(^CHARSET_REGISTRY\s+")JISX0208.1983("\s*)$/s) {
		$_ = $1 . "ISO10646" . $2;
	} elsif (/(^CHARSET_ENCODING\s+")0("\s*)$/s) {
		$_ = $1 . "1" . $2;
	}
	print;
}
