Skip site navigation (1) Skip section navigation (2)

Re: [PATCHES] Chinese GB18030 support is implemented!

From: Bill Huang <bill_huanghb(at)ybb(dot)ne(dot)jp>
To: Bill Huang <bill_huanghb(at)ybb(dot)ne(dot)jp>
Cc: pgsql-announce(at)postgresql(dot)org, pgsql-patches(at)postgresql(dot)org,Bill Huang <bhuang(at)redhat(dot)com>, Yukihiro Nakai <ynakai(at)redhat(dot)com>
Subject: Re: [PATCHES] Chinese GB18030 support is implemented!
Date: 2002-06-06 07:54:05
Message-ID: 3CFF151D.8030402@ybb.ne.jp (view raw or flat)
Thread:
Lists: pgsql-announcepgsql-patches
-GB18030 support is also available on odbc front-end side.

Best Regards,
Bill


Bill Huang wrote:

>Hello,
>
>As postgresql is widely used in the world,many Chinese users are looking
>forward to use such a high performanced database management
>system.However since the Chinese new codepage standard GB18030 is not
>completely supported,postgresql is limitted to be used in China.
>
>Now I have managed to implement the GB18030 support upon the latest
>version,so the following functions are added after the patches are added.
>
>-Chinese GB18030 encoding is available on front-end side,while on
>backend side,EUC_CN or MIC is used.
>-Encoding convertion between MIC and GB18030 is implement.
>-GB18030 locale support is available on front-end side.
>-GB18030 locale test is added.
>
>Any help for testing with these patches and sugguestions for GB18030
>support are greatly appreciated.
>
>Best Regards,
>Bill
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/backend/utils/mb/conv.c.org	Thu Jun  6 11:52:24 2002
>+++ postgresql-7.2.1/src/backend/utils/mb/conv.c	Thu Jun  6 12:20:36 2002
>@@ -502,6 +502,96 @@
> }
> 
> /*
>+ * GB18030 ---> MIC
>+ * Added by Bill Huang <bhuang(at)redhat(dot)com>,<bill_huanghb(at)ybb(dot)ne(dot)jp>
>+ */
>+static void
>+gb180302mic(unsigned char *gb18030, unsigned char *p, int len)
>+{
>+	int			c1;
>+	int			c2;
>+
>+	while (len > 0 && (c1 = *gb18030++))
>+	{
>+		if (c1 < 0x80)
>+		{						/* should be ASCII */
>+			len--;
>+			*p++ = c1;
>+		}
>+		else if(c1 >= 0x81 && c1 <= 0xfe)
>+		{
>+			c2 = *gb18030++;
>+			
>+			if(c2 >= 0x30 && c2 <= 0x69){
>+				len -= 4;
>+				*p++ = c1;
>+				*p++ = c2;
>+				*p++ = *gb18030++;
>+				*p++ = *gb18030++;
>+				*p++ = *gb18030++;
>+			}
>+			else if ((c2 >=0x40 && c2 <= 0x7e) ||(c2 >=0x80 && c2 <= 0xfe)){
>+				len -= 2;
>+				*p++ = c1;
>+				*p++ = c2;
>+				*p++ = *gb18030++;
>+			}
>+			else{	/*throw the strange code*/
>+				len--;
>+			}
>+		}
>+	}
>+	*p = '\0';
>+}
>+
>+/*
>+ * MIC ---> GB18030
>+ * Added by Bill Huang <bhuang(at)redhat(dot)com>,<bill_huanghb(at)ybb(dot)ne(dot)jp>
>+ */
>+static void
>+mic2gb18030(unsigned char *mic, unsigned char *p, int len)
>+{
>+	int			c1;
>+	int			c2;
>+
>+	while (len > 0 && (c1 = *mic))
>+	{
>+		len -= pg_mic_mblen(mic++);
>+
>+		if (c1 <= 0x7f) /*ASCII*/
>+		{					
>+			*p++ = c1;
>+		}
>+		else if (c1 >= 0x81 && c1 <= 0xfe)
>+		{		
>+			c2 = *mic++;
>+			
>+			if((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe)){
>+				*p++ = c1;
>+				*p++ = c2;
>+			}
>+			else if(c2 >= 0x30 && c2 <= 0x39){
>+				*p++ = c1;
>+				*p++ = c2;
>+				*p++ = *mic++;
>+				*p++ = *mic++;
>+			}	
>+			else{
>+				mic--;
>+				printBogusChar(&mic, &p);
>+				mic--;
>+				printBogusChar(&mic, &p);
>+			}		
>+		}
>+		else{
>+			mic--;
>+			printBogusChar(&mic, &p);
>+		}
>+	}
>+	*p = '\0';
>+}
>+
>+/*
>  * EUC_TW ---> MIC
>  */
> static void
>@@ -1583,6 +1673,26 @@
> }
> 
> /*
>+ * UTF-8 ---> GB18030
>+ */
>+static void
>+utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len)
>+
>+{
>+	utf_to_local(utf, euc, ULmapEUC_CN,
>+				 sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len);
>+}
>+
>+/*
>+ * GB18030 ---> UTF-8
>+ */
>+static void
>+gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len)
>+{
>+	local_to_utf(euc, utf, LUmapEUC_CN,
>+		  sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
>+}
>+/*
>  * UTF-8 ---> EUC_KR
>  */
> static void
>@@ -1754,6 +1864,9 @@
> 		PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5
> 	},
> 	{
>+		PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030
>+	},
>+	{
> 		PG_WIN1250, win12502mic, mic2win1250, 0, 0
> 	},
> };
>@@ -1841,6 +1954,9 @@
> 		PG_BIG5, big52mic, mic2big5, 0, 0
> 	},
> 	{
>+		PG_GB18030, gb180302mic, mic2gb18030, 0, 0
>+	},
>+	{
> 		PG_WIN1250, win12502mic, mic2win1250, 0, 0
> 	},
> };
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/backend/utils/mb/encnames.c.org	Mon Jun  3 19:24:10 2002
>+++ postgresql-7.2.1/src/backend/utils/mb/encnames.c	Mon Jun  3 19:25:26 2002
>@@ -173,6 +173,9 @@
> 	{
> 		"windows1251", PG_WIN1251
> 	},							/* Windows-1251; Microsoft */
>+	{
>+		"gb18030", PG_GB18030
>+	},							/* GB18030; GB18030 */
> 
> 	{
> 		NULL, 0
>@@ -268,6 +271,9 @@
> 		"BIG5", PG_BIG5
> 	},
> 	{
>+		"GB18030", PG_GB18030
>+	},
>+	{
> 		"WIN1250", PG_WIN1250
> 	}
> };
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/interfaces/odbc/multibyte.c.org	Wed Jun  5 18:28:30 2002
>+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.c	Wed Jun  5 19:48:01 2002
>@@ -48,6 +48,28 @@
> 						mb_st = 0;
> 				}
> 				break;
>+				/* Chinese GB18030 support
>+				 * By Bill Huang <bhuang(at)redhat(dot)com>,<bill_huanghb(at)ybb(dot)ne(dot)jp>
>+				 * */
>+			case GB18030:
>+				{
>+					if (mb_st < 2 && s[i] > 0x81)
>+						mb_st = 2;
>+					else if (mb_st == 2)
>+						if(s[i] >= 0x30 && s[i] <= 0x39)
>+							mb_st = 3;
>+						else
>+							mb_st = 1;
>+					else if (mb_st == 3)
>+						if(s[i] >= 0x30 && s[i] <= 0x39)
>+							 mb_st = 1;
>+						else
>+							 mb_st = 3;
>+					else
>+						mb_st = 0;
>+				}
>+				break;
>+				
> 			default:
> 				mb_st = 0;
> 		}
>@@ -87,6 +109,16 @@
> 	{
> 		multibyte_client_encoding = BIG5;
> 		return ("BIG5");
>+	}/* Chinese GB18030 support.
>+	  * Added by Bill Huang  <bhuang(at)redhat(dot)com>,<bill_huanghb(at)ybb(dot)ne(dot)jp> 
>+	  */
>+	if (strstr(str, "%27GB18030%27") ||
>+		strstr(str, "%27gb18030%27") ||
>+		strstr(str, "'GB18030'") ||
>+		strstr(str, "'gb18030'") )
>+	{
>+		multibyte_client_encoding = GB18030;
>+		return ("GB18030");
> 	}
> 	return ("OTHER");
> }
>@@ -127,6 +159,25 @@
> 				else
> 					multibyte_status = 0;
> 			}
>+			break;
>+			/*Chinese GB18030 support.Added by Bill Huang <bhuang(at)redhat(dot)com> <bill_huanghb(at)ybb(dot)ne(dot)jp>*/
>+		case GB18030:
>+			{
>+				if (multibyte_status < 2 && s > 0x80)
>+					multibyte_status = 2;
>+				else if (multibyte_status = 2)
>+					if (s >= 0x30 && s <= 0x39)
>+						multibyte_status = 3;
>+					else
>+						multibyte_status = 1;
>+				else if (multibyte_status = 3)
>+					if (s >= 0x30 && s <= 0x39)
>+						multibyte_status = 1;
>+					else
>+						multibyte_status = 3;
>+				else
>+					multibyte_status = 0;
>+			}
> 			break;
> 		default:
> 			multibyte_status = 0;
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/interfaces/odbc/multibyte.h.org	Wed Jun  5 19:51:20 2002
>+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.h	Wed Jun  5 19:51:35 2002
>@@ -28,6 +28,7 @@
> #define SJIS				32	/* Shift JIS */
> #define BIG5				33	/* Big5 */
> #define WIN1250				34	/* windows-1250 */
>+#define GB18030				35	/* GB18030 */
> 
> extern int	multibyte_client_encoding;	/* Multibyte client encoding. */
> extern int	multibyte_status;	/* Multibyte charcter status. */
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/include/mb/pg_wchar.h.org	Mon May 27 20:07:58 2002
>+++ postgresql-7.2.1/src/include/mb/pg_wchar.h	Mon May 27 20:08:59 2002
>@@ -182,6 +182,7 @@
> 	/* followings are for client encoding only */
> 	PG_SJIS,					/* Shift JIS */
> 	PG_BIG5,					/* Big5 */
>+	PG_GB18030,					/* GB18030 */
> 	PG_WIN1250,					/* windows-1250 */
> 
> 	_PG_LAST_ENCODING_			/* mark only */
>
>
>------------------------------------------------------------------------
>
>--- postgresql-7.2.1/src/backend/utils/mb/wchar.c.org	Mon May 27 20:02:44 2002
>+++ postgresql-7.2.1/src/backend/utils/mb/wchar.c	Mon May 27 20:03:12 2002
>@@ -457,6 +457,33 @@
> 	return (len);
> }
> 
>+/*
>+ * GB18030
>+ * Added by Bill Huang <bhuang(at)redhat(dot)com>,<bill_huanghb(at)ybb(dot)ne(dot)jp>
>+ */
>+static int
>+pg_gb18030_mblen(const unsigned char *s)
>+{
>+	int			len;
>+
>+	if (*s <= 0x7f)
>+	{							/* kanji? */
>+		len = 1;
>+	}
>+	else 
>+	{							/* should be ASCII */
>+	
>+		if((*(s+1) >0x40 && *(s+1) <= 0x7e) 
>+				|| (*(s+1) >= 0x80 && *(s+1) <= 0xfe))
>+			len = 2;
>+		else if(*(s+1) >0x30 && *(s+1) <= 0x39)
>+			len = 4;
>+		else
>+			len = 2;
>+	}
>+	return (len);
>+}
>+
> pg_wchar_tbl pg_wchar_table[] = {
> 	{pg_ascii2wchar_with_len, pg_ascii_mblen, 1},		/* 0; PG_SQL_ASCII	*/
> 	{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3},		/* 1; PG_EUC_JP */
>@@ -483,6 +510,7 @@
> 	{pg_latin12wchar_with_len, pg_latin1_mblen, 1},		/* 22; ISO-8859-15 */
> 	{pg_latin12wchar_with_len, pg_latin1_mblen, 1},		/* 23; ISO-8859-16 */
> 	{0, pg_sjis_mblen, 2},		/* 24; PG_SJIS */
>+	{0, pg_gb18030_mblen, 2},	/* 25; PG_GB18030 */
> 	{0, pg_big5_mblen, 2},		/* 25; PG_BIG5 */
> 	{pg_latin12wchar_with_len, pg_latin1_mblen, 1}		/* 26; PG_WIN1250 */
> };
>
>
>------------------------------------------------------------------------
>
>
>---------------------------(end of broadcast)---------------------------
>TIP 2: you can get off all lists at once with the unregister command
>    (send "unregister YourEmailAddressHere" to majordomo(at)postgresql(dot)org)
>
> postgresql-7.2.1-conv-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> postgresql-7.2.1-encnames-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> postgresql-7.2.1-multibyte-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> postgresql-7.2.1-multibyteh-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> postgresql-7.2.1-pg-wchar-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> postgresql-7.2.1-wchar-gb18030.patch
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> base64
>
>
> ------------------------------------------------------------------------
> Part 1.8
>
> Content-Type:
>
> text/plain
> Content-Encoding:
>
> 8bit
>
>


-- 
/---------------------------/
黄 宏彬 (Bill Huang)
E-mail:bill_huanghb(at)ybb(dot)ne(dot)jp
Cell phone:090-9979-4631
/---------------------------/




In response to

pgsql-announce by date

Next:From: webmasterDate: 2002-06-06 16:29:15
Subject: GBORG NEWS: Welcome Alberto Magno and Ulrich Sprick!! (npgsql)
Previous:From: Bill HuangDate: 2002-06-06 07:10:25
Subject: Chinese GB18030 support is implemented!

pgsql-patches by date

Next:From: Mike WyerDate: 2002-06-06 21:33:14
Subject: macaddr format fix
Previous:From: Dave PageDate: 2002-06-06 07:23:22
Subject: FW: Patch for current_schemas to optionally include implicit schemas

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group