ISBN/ISSN/ISMN/EAN13 module

From: "Jeremy Kronuz" <kronuz(at)hotmail(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Cc: tgl(at)sss(dot)pgh(dot)pa(dot)us, pgsql-patches(at)postgresql(dot)org
Subject: ISBN/ISSN/ISMN/EAN13 module
Date: 2006-09-09 22:13:51
Message-ID: BAY107-F328919B6E12D3501B66606D8340@phx.gbl
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers pgsql-patches

> >> I'm reviewing this for addition to contrib/ now. I notice that there
>is
> >> no clear license statement. Is it OK to put the following into the
> >> README file?
>
> > Tom, yes. Also, I just put copyright in the files that contain a
>significant
> > amount of code written by me. (i.e. isn.h and isn.c).
>
>Excellent, thanks.
>
> > Also, did you get the 'isn-1_0_beta_20060924' version from my ftp?
>'cause
> > thats the last one I updated (2006-09-24). It's at:
> > ftp://ftp.kronuz.com/pub/programming/isn-1_0_beta_20060924.tar.bz2 (just
>in
> > case)
>
>You had submitted two slightly different versions to the mailing list
>awhile back. I took the one that seemed to have later file dates and
>did some fixes/editorializations on that. Please look at what I've
>just committed to PG CVS and see if you want to make any adjustments
>--- if so, submit a patch through the usual pgsql-patches channel.
>

Tom, I've checked the version in the cvs and I had made significant changes
from that version. I fixed some major bugs that prevented some ISBN numbers
from working. I've merged the changes made for the cvs with my own, hoping I
didn't miss anything that was already fixed in the cvs... I noticed you
changed some log messages and that you added GET_STR and GET_TEXT. indeed
there was that thing that took me some time to figure in both
ean13_cast_to_text() and isn_cast_to_text() functions, not changing to a
valid TEXT the return value; but I did not know about the other
*_cast_from_text() functions having the problem of the missing GET_STR() (I
was using VARDATA()).

As I changed so much things with the patch (I'm sending the patch to
pgsql-patches too), Could you please check that those two functions
(ean13_cast_to_text() and isn_cast_to_text()) to see if the use of
GET_TEXT() would be a better option from the one I'm using? (I hope I left
all the improved messages okay)

Regards,
Kronuz.

Patch follows next:
diff -ruN oldisn/isn.c isn/isn.c
--- oldisn/isn.c 2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.c 2006-09-09 16:58:43.098148200 -0500
@@ -31,7 +31,7 @@

enum isn_type { INVALID, ANY, EAN13, ISBN, ISMN, ISSN, UPC };

-static const char *isn_names[] = { "ISN", "ISN", "EAN13", "ISBN", "ISMN",
"ISSN", "UPC" };
+static const char *isn_names[] = { "EAN13/UPC/ISxN", "EAN13/UPC/ISxN",
"EAN13", "ISBN", "ISMN", "ISSN", "UPC" };

static bool g_weak = false;
static bool g_initialized = false;
@@ -43,11 +43,11 @@

/***********************************************************************
**
- ** Routines for ISNs.
+ ** Routines for EAN13/UPC/ISxNs.
**
** Note:
** In this code, a normalized string is one that is known to be a valid
- ** ISN number containing only digits and hyphens and with enough space
+ ** ISxN number containing only digits and hyphens and with enough space
** to hold the full 13 digits plus the maximum of four hyphens.
***********************************************************************/

@@ -217,7 +217,7 @@
}

/*
- * weight_checkdig -- Receives a buffer with a normalized ISN string
number,
+ * weight_checkdig -- Receives a buffer with a normalized ISxN string
number,
* and the length to weight.
*
* Returns the weight of the number (the check digit value, 0-10)
@@ -239,7 +239,7 @@

/*
- * checkdig --- Receives a buffer with a normalized ISN string number,
+ * checkdig --- Receives a buffer with a normalized ISxN string number,
* and the length to check.
*
* Returns the check digit value (0-9)
@@ -267,8 +267,94 @@
}

/*
- * ean2isn --- Convert in-place a normalized EAN13 string to the
corresponding
- * ISN string number. Assumes the input string is normalized.
+ * ean2isn --- Try to convert an ean13 number to a UPC/ISxN number.
+ * This doesn't verify for a valid check digit.
+ *
+ * If errorOK is false, ereport a useful error message if the ean13 is bad.
+ * If errorOK is true, just return "false" for bad input.
+ */
+static
+bool ean2isn(ean13 ean, bool errorOK, ean13 *result, enum isn_type accept)
+{
+ enum isn_type type = INVALID;
+
+ char buf[MAXEAN13LEN + 1];
+ char *firstdig, *aux;
+ unsigned digval;
+ unsigned search;
+ ean13 ret = ean;
+
+ ean >>= 1;
+ /* verify it's in the EAN13 range */
+ if(ean > UINT64CONST(9999999999999))
+ goto eantoobig;
+
+ /* convert the number */
+ search = 0;
+ firstdig = aux = buf + 13;
+ *aux = '\0'; /* terminate string; aux points to last digit */
+ do {
+ digval = (unsigned)(ean % 10); /* get the decimal value */
+ ean /= 10; /* get next digit */
+ *--aux = (char)(digval + '0'); /* convert to ascii and store */
+ } while(ean && search++<12);
+ while(search++<12) *--aux = '0'; /* fill the remaining EAN13 with '0' */
+
+ /* find out the data type: */
+ if(!strncmp("978", buf, 3)) { /* ISBN */
+ type = ISBN;
+ } else if(!strncmp("977", buf, 3)) { /* ISSN */
+ type = ISSN;
+ } else if(!strncmp("9790", buf, 4)) { /* ISMN */
+ type = ISMN;
+ } else if(!strncmp("979", buf, 3)) { /* ISBN-13 */
+ type = ISBN;
+ } else if(*buf == '0') { /* UPC */
+ type = UPC;
+ } else {
+ type = EAN13;
+ }
+ if(accept != ANY && accept != EAN13 && accept != type) goto eanwrongtype;
+
+ *result = ret;
+ return true;
+
+eanwrongtype:
+ if(!errorOK) {
+ if(type!=EAN13) {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("cannot cast EAN13(%s) to %s for number: \"%s\"",
+ isn_names[type], isn_names[accept], buf)));
+ } else {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("cannot cast %s to %s for number: \"%s\"",
+ isn_names[type], isn_names[accept], buf)));
+ }
+ }
+ return false;
+
+eantoobig:
+ if(!errorOK) {
+ char eanbuf[64];
+
+ /*
+ * Format the number separately to keep the machine-dependent
+ * format code out of the translatable message text
+ */
+ snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for %s type",
+ eanbuf, isn_names[type])));
+ }
+ return false;
+}
+
+/*
+ * ean2UPC/ISxN --- Convert in-place a normalized EAN13 string to the
corresponding
+ * UPC/ISxN string number. Assumes the input string is
normalized.
*/
static inline
void ean2ISBN(char *isn)
@@ -325,7 +411,8 @@
{
ean13 ean = 0; /* current ean */
while(*num) {
- ean = 10 * ean + ((*num++) - '0');
+ if(isdigit(*num)) ean = 10 * ean + (*num - '0');
+ num++;
}
return (ean<<1); /* also give room to a flag */
}
@@ -336,7 +423,7 @@
* the string (maximum MAXEAN13LEN+1 bytes)
* This doesn't verify for a valid check digit.
*
- * If shortType is true, the returned string is in the old ISN short
format.
+ * If shortType is true, the returned string is in the old ISxN short
format.
* If errorOK is false, ereport a useful error message if the string is
bad.
* If errorOK is true, just return "false" for bad input.
*/
@@ -369,8 +456,8 @@
digval = (unsigned)(ean % 10); /* get the decimal value */
ean /= 10; /* get next digit */
*--aux = (char)(digval + '0'); /* convert to ascii and store */
- if(++search == 1) *--aux = '-'; /* the check digit is always there */
- } while(ean);
+ if(search == 0) *--aux = '-'; /* the check digit is always there */
+ } while(ean && search++<13);
while(search++<13) *--aux = '0'; /* fill the remaining EAN13 with '0' */

/* The string should be in this form: ???DDDDDDDDDDDD-D" */
@@ -409,7 +496,7 @@
TABLE_index = NULL;
}

- /* verify it's a logically valid EAN13/ISN */
+ /* verify it's a logically valid EAN13/UPC/ISxN */
digval = search;
search = hyphenate(result+digval, result+digval+2, TABLE, TABLE_index);

@@ -452,8 +539,8 @@
snprintf(eanbuf, sizeof(eanbuf), EAN13_FORMAT, ean);
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
- errmsg("value \"%s\" is out of range for ISN type",
- eanbuf)));
+ errmsg("value \"%s\" is out of range for %s type",
+ eanbuf, isn_names[type])));
}
return false;
}
@@ -483,7 +570,7 @@
/* recognize and validate the number: */
while(*aux2 && length <= 13) {
last = (*(aux2+1) == '!' || *(aux2+1) == '\0'); /* is the last character
*/
- digit = isdigit(*aux2); /* is current character a digit? */
+ digit = (isdigit(*aux2)!=0); /* is current character a digit? */
if(*aux2=='?' && last) /* automagically calculate check digit if it's '?'
*/
magic = digit = true;
if(length == 0 && (*aux2=='M' || *aux2=='m')) {
@@ -583,19 +670,25 @@
break;
}

- if(!valid && !magic) goto eanbadcheck;
-
+ /* fix the check digit: */
for(aux1 = buf; *aux1 && *aux1 <= ' '; aux1++);
aux1[12] = checkdig(aux1, 13) + '0';
aux1[13] = '\0';

+ if(!valid && !magic) goto eanbadcheck;
+
*result = str2ean(aux1);
*result |= valid?0:1;
-
return true;

eanbadcheck:
- if(!g_weak) {
+ if(g_weak) { /* weak input mode is activated: */
+ /* set the "invalid-check-digit-on-input" flag */
+ *result = str2ean(aux1);
+ *result |= 1;
+ return true;
+ }
+
if(!errorOK) {
if(rcheck == (unsigned)-1) {
ereport(ERROR,
@@ -610,30 +703,6 @@
}
}
return false;
- }
-
- if(accept != EAN13 && accept != ANY && type != accept) goto eanwrongtype;
-
- /* fix the check digit: */
- for(aux1 = buf; *aux1 && *aux1 <= ' '; aux1++);
- aux1[12] = checkdig(aux1, 13) + '0';
- aux1[13] = '\0';
- *result = str2ean(aux1);
-
- /* set the "invalid-check-digit-on-input" flag */
- *result |= 1;
-
- /* just warn about the error when there was a real check digit error: */
- if(check != rcheck) {
- if(rcheck == (unsigned)-1) {
- elog(WARNING, "invalid %s number: \"%s\"",
- isn_names[accept], str);
- } else {
- elog(WARNING, "invalid check digit for %s number: \"%s\", should be %c",
- isn_names[accept], str, (rcheck==10)?('X'):(rcheck+'0'));
- }
- }
- return true;

eaninvalid:
if(!errorOK)
@@ -647,8 +716,8 @@
if(!errorOK)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
- errmsg("invalid %s type for number: \"%s\"",
- isn_names[accept], str)));
+ errmsg("cannot cast %s to %s for number: \"%s\"",
+ isn_names[type], isn_names[accept], str)));
return false;

eantoobig:
@@ -784,26 +853,97 @@
Datum
ean13_cast_to_text(PG_FUNCTION_ARGS)
{
- ean13 val = PG_GETARG_EAN13(0);
- char buf[MAXEAN13LEN + 1];
+ /* arg is ean13, but easier to leave it as Datum */
+ Datum arg = PG_GETARG_DATUM(0);
+ char *s;
+ int len;
+ text *result;

- (void) ean2string(val, false, buf, false);
+ s = DatumGetCString(DirectFunctionCall1(ean13_out, arg));
+ len = strlen(s);
+
+ result = (text *) palloc(VARHDRSZ + len);

- PG_RETURN_TEXT_P(GET_TEXT(buf));
+ VARATT_SIZEP(result) = len + VARHDRSZ;
+ memcpy(VARDATA(result), s, len);
+
+ pfree(s);
+
+ PG_RETURN_TEXT_P(result);
}

PG_FUNCTION_INFO_V1(isn_cast_to_text);
Datum
isn_cast_to_text(PG_FUNCTION_ARGS)
{
+ /* arg is ean13, but easier to leave it as Datum */
+ Datum arg = PG_GETARG_DATUM(0);
+ char *s;
+ int len;
+ text *result;
+
+ s = DatumGetCString(DirectFunctionCall1(isn_out, arg));
+ len = strlen(s);
+
+ result = (text *) palloc(VARHDRSZ + len);
+
+ VARATT_SIZEP(result) = len + VARHDRSZ;
+ memcpy(VARDATA(result), s, len);
+
+ pfree(s);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+PG_FUNCTION_INFO_V1(isbn_cast_from_ean13);
+Datum
+isbn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
ean13 val = PG_GETARG_EAN13(0);
- char buf[MAXEAN13LEN + 1];
+ ean13 result;

- (void) ean2string(val, false, buf, true);
+ (void) ean2isn(val, false, &result, ISBN);
+
+ PG_RETURN_EAN13(result);
+}
+
+PG_FUNCTION_INFO_V1(ismn_cast_from_ean13);
+Datum
+ismn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+ ean13 val = PG_GETARG_EAN13(0);
+ ean13 result;

- PG_RETURN_TEXT_P(GET_TEXT(buf));
+ (void) ean2isn(val, false, &result, ISMN);
+
+ PG_RETURN_EAN13(result);
}

+PG_FUNCTION_INFO_V1(issn_cast_from_ean13);
+Datum
+issn_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+ ean13 val = PG_GETARG_EAN13(0);
+ ean13 result;
+
+ (void) ean2isn(val, false, &result, ISSN);
+
+ PG_RETURN_EAN13(result);
+}
+
+PG_FUNCTION_INFO_V1(upc_cast_from_ean13);
+Datum
+upc_cast_from_ean13(PG_FUNCTION_ARGS)
+{
+ ean13 val = PG_GETARG_EAN13(0);
+ ean13 result;
+
+ (void) ean2isn(val, false, &result, UPC);
+
+ PG_RETURN_EAN13(result);
+}
+
+
PG_FUNCTION_INFO_V1(ean13_cast_from_text);
Datum
ean13_cast_from_text(PG_FUNCTION_ARGS)
diff -ruN oldisn/isn.h isn/isn.h
--- oldisn/isn.h 2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.h 2006-09-09 16:35:49.486379700 -0500
@@ -27,8 +27,8 @@

#define EAN13_FORMAT UINT64_FORMAT

-#define PG_GETARG_EAN13(n) PG_GETARG_INT64(n)
-#define PG_RETURN_EAN13(x) PG_RETURN_INT64(x)
+#define PG_GETARG_EAN13(n) PG_GETARG_INT64((int64)n)
+#define PG_RETURN_EAN13(x) PG_RETURN_INT64((int64)x)

extern Datum isn_out(PG_FUNCTION_ARGS);
extern Datum ean13_out(PG_FUNCTION_ARGS);
@@ -45,6 +45,10 @@
extern Datum ismn_cast_from_text(PG_FUNCTION_ARGS);
extern Datum issn_cast_from_text(PG_FUNCTION_ARGS);
extern Datum upc_cast_from_text(PG_FUNCTION_ARGS);
+extern Datum isbn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum ismn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum issn_cast_from_ean13(PG_FUNCTION_ARGS);
+extern Datum upc_cast_from_ean13(PG_FUNCTION_ARGS);

extern Datum is_valid(PG_FUNCTION_ARGS);
extern Datum make_valid(PG_FUNCTION_ARGS);
diff -ruN oldisn/isn.sql.in isn/isn.sql.in
--- oldisn/isn.sql.in 2006-09-08 23:07:52.000000000 -0500
+++ isn/isn.sql.in 2006-09-09 16:03:53.771250000 -0500
@@ -2131,86 +2131,111 @@
-- Type casts:
--
---------------------------------------------------
+CREATE FUNCTION isbn13(ean13)
+RETURNS isbn13
+AS 'MODULE_PATHNAME', 'isbn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION ismn13(ean13)
+RETURNS ismn13
+AS 'MODULE_PATHNAME', 'ismn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION issn13(ean13)
+RETURNS issn13
+AS 'MODULE_PATHNAME', 'issn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION isbn(ean13)
+RETURNS isbn
+AS 'MODULE_PATHNAME', 'isbn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION ismn(ean13)
+RETURNS ismn
+AS 'MODULE_PATHNAME', 'ismn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION issn(ean13)
+RETURNS issn
+AS 'MODULE_PATHNAME', 'issn_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+CREATE FUNCTION upc(ean13)
+RETURNS upc
+AS 'MODULE_PATHNAME', 'upc_cast_from_ean13'
+LANGUAGE 'C' IMMUTABLE STRICT;
+
+
CREATE FUNCTION ean13(text)
RETURNS ean13
AS 'MODULE_PATHNAME', 'ean13_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION isbn13(text)
RETURNS isbn13
AS 'MODULE_PATHNAME', 'isbn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION ismn13(text)
RETURNS ismn13
AS 'MODULE_PATHNAME', 'ismn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION issn13(text)
RETURNS issn13
AS 'MODULE_PATHNAME', 'issn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION isbn(text)
RETURNS isbn
AS 'MODULE_PATHNAME', 'isbn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION ismn(text)
RETURNS ismn
AS 'MODULE_PATHNAME', 'ismn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION issn(text)
RETURNS issn
AS 'MODULE_PATHNAME', 'issn_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION upc(text)
RETURNS upc
AS 'MODULE_PATHNAME', 'upc_cast_from_text'
LANGUAGE 'C' IMMUTABLE STRICT;

+
CREATE FUNCTION text(ean13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(isbn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(ismn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(issn13)
RETURNS text
AS 'MODULE_PATHNAME', 'ean13_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(isbn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(ismn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(issn)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;
-
CREATE FUNCTION text(upc)
RETURNS text
AS 'MODULE_PATHNAME', 'isn_cast_to_text'
LANGUAGE 'C' IMMUTABLE STRICT;

+CREATE CAST (ean13 AS isbn13) WITH FUNCTION isbn13(ean13);
+CREATE CAST (ean13 AS isbn) WITH FUNCTION isbn(ean13);
+CREATE CAST (ean13 AS ismn13) WITH FUNCTION ismn13(ean13);
+CREATE CAST (ean13 AS ismn) WITH FUNCTION ismn(ean13);
+CREATE CAST (ean13 AS issn13) WITH FUNCTION issn13(ean13);
+CREATE CAST (ean13 AS issn) WITH FUNCTION issn(ean13);
+CREATE CAST (ean13 AS upc) WITH FUNCTION upc(ean13);
+
CREATE CAST (isbn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (isbn AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (ismn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
@@ -2218,6 +2243,7 @@
CREATE CAST (issn13 AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (issn AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (upc AS ean13) WITHOUT FUNCTION AS ASSIGNMENT;
+
CREATE CAST (isbn AS isbn13) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (isbn13 AS isbn) WITHOUT FUNCTION AS ASSIGNMENT;
CREATE CAST (ismn AS ismn13) WITHOUT FUNCTION AS ASSIGNMENT;

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Joshua D. Drake 2006-09-09 22:25:55 Re: Release notes
Previous Message mark 2006-09-09 22:03:17 Re: TODO item: GUID

Browse pgsql-patches by date

  From Date Subject
Next Message Jeremy Kronuz 2006-09-09 22:48:10 Re: ISBN/ISSN/ISMN/EAN13 module
Previous Message Christopher Browne 2006-09-09 10:25:34 Re: [HACKERS] Fix linking of OpenLDAP libraries