#!/usr/bin/perl
#------------------------------------------------------------------------------
# Project  : LINCAT E-BUSINESS software
# Name     : lincat-indexer.pl
# Language : 5.005_03 built for i386-linux
# OS       : linux RedHat 6.1 kernel 2.2.12-32
# Author   : Gilles Darold, gilles@darold.net
# Copyright: Copyright (c) 1998-2000 : Gilles Darold - All rights reserved -
# Function : Script use to create a text search index.
# Usage    : See documentation.
#------------------------------------------------------------------------------
# Version control :
# $Id$
#------------------------------------------------------------------------------
use strict vars;

# Packages used. 
use DBI ();
use DBD::Pg ();
use Getopt::Std;
use Benchmark;

my $DEBUG = 0;

my $port = '5432';
my $host = 'localhost';
my $database = '';
my $table = '';
my $field = '';
my $language = '';
my $stopwords = '';
my $incr = '';
my $user = '';
my $clean = 0;
my $trace = 0;
my $quiet = 0;

my $t0 = new Benchmark;

sub usage
{
	print "Usage: $0 [-c -i file -h host -p port] -t table -f field -l language -u user -s file -d dbname\n";
	print "\t-c        => drop search tables and indexes, reset all indexing\n";
	print "\t-i file   => do incremental index with file\n";
	print "\t-v        => print a dot for each word processed\n";
	print "\t-h host   => database hostname [default: localhost]\n";
	print "\t-p port   => database service port [default: 5432]\n";
	print "\t-t table  => table to index (can be t_item,t_information)\n";
	print "\t-f field  => field to index (can be s_name_id or s_description_id)\n";
	print "\t-l fr     => language to use (can be fr, en, ...)\n";
	print "\t-u user   => postgres user who need read grant on search tables\n";
	print "\t-s stop   => file with a list of all words not to be indexed\n";
	print "\t-d dbname => name of the database\n";
	print "\t-q        => quiet mode, do not print anything else than error message\n";
	exit(0);
}

# Do not bufferize
$| = 1;

#---------- Get command line arguments
my %opts = ();
getopt('dfhilpstu', \%opts);
if ( exists $opts{c} ) {
	$clean = 1;
}
if ( exists $opts{v} ) {
	$trace = 1;
}
if ( exists $opts{q} ) {
	$quiet = 1;
}
if ( $opts{d} ) {
	$database = $opts{d};
} else {
	&usage;
}
if ( exists $opts{i} ) {
	&usage if (!$opts{i});
	$incr = $opts{i} if ( $opts{i} );
}
if ( $opts{f} ) {
	$field = $opts{f};
} else {
	&usage;
}
if ( exists $opts{h} ) {
	&usage if (!$opts{h});
	$host = $opts{h};
}
if ( $opts{l} ) {
	$language = $opts{l};
} else {
	&usage;
}
if ( exists $opts{p} ) {
	&usage if (!$opts{p});
	$port = $opts{p};
}
if ( $opts{s} ) {
	$stopwords = $opts{s};
} else {
	&usage;
}
if ( $opts{t} ) {
	$table = $opts{t};
} else {
	&usage;
}
if ( $opts{u} ) {
	$user = $opts{u};
} else {
	&usage;
}

#---------- Load the stop words into memory
my @stopper = ();
if ( $stopwords ) {
	local(*STOPFILE) = undef;
	local($/) = '';
	unless ( open(STOPFILE, $stopwords) ) {
		print STDERR "ERROR Lincat-indexer : Can't open file $stopwords, $!\n";
		exit(0);
	}
	my $stop = <STOPFILE>;
	close(STOPFILE);
	$/ = "\n";
	@stopper = split(/\n/, $stop);
}

#---------- Connect to the database
my $dbsrc = "dbi:Pg:dbname=$database;host=$host;port=$port";
my $dbh = DBI->connect($dbsrc, '', '', {AutoCommit => 0});
if (!$dbh) {
	print STDERR "ERROR Lincat-indexer : ", $DBI::errstr, ".\n";
	exit(0);
}

#### In incremental indexing data are taken from a history file
# else data are all database references
#---------- Open for reading the history file index if provided
my @ref = '';
if ( $incr ) {
	print STDERR "ERROR : file $incr doesn't exist\n", exit(0) if ( !(-e "$incr") );
	local(*HISTFILE) = undef;
	local($/) = '';
	unless ( open(HISTFILE, $incr) ) {
		print STDERR "ERROR Lincat-indexer : Can't open file $incr, $!\n";
		exit(0);
	}
	my $hist = <HISTFILE>;
	close(HISTFILE);
	$/ = "\n";
	foreach ( split(/\n/, $hist) ) {
		&delete_link($dbh, $language, $table, $field, $_);
		# Get the object data
		my $array_ref = &select_references($dbh, $table, $field, $_);
		push(@ref, [ @$array_ref ])
	}
} else {
	#---------- Get all string references into the table
	@ref = &select_references($dbh, $table, $field);
	if ( $#ref >= 0 ) {
		#---------- Initialize database if requested
		&init_database($dbh, $language, $user) if ( $clean );
	} else {
		print "Lincat-indexer : nothing to index...\n" if ( !$quiet );
		$dbh->disconnect();
		exit(0);
	}
}

#---------- For each references get the translation
my $nb = 0;
my $id = 0;
my $ins = 0;
foreach my $obj (@ref) {
	next if (!${@$obj}[2]);
	#---------- Get the string
	my @string = &select_string($dbh, $language, ${@$obj}[2]);
	foreach (@string) {
		#---------- Suppress HTML code
		${@$_}[0] =~ s/<[^>]*>//gs;
		#---------- Split words
		my @words = split(/[\W\b]/, ${@$_}[0]);
		@words = sort @words;
		for my $w (0 .. $#words) {
			$words[$w] = lc($words[$w]);
			$words[$w] =~ s/s$//;
			$words[$w] =~ s/x$//;
			next if ( (length($words[$w]) < 2) || grep(/^$words[$w]$/, @stopper) );
			#---------- Insert this word into the dic table
			my $ref = &select_word($dbh, $language, $words[$w]);
			# if not exist
			if ( ! $ref ) {
				$nb++;
				# Insert word into dic
				&insert_word($dbh, $language, "$field-$id-$w", $words[$w]);
				&insert_link($dbh, $language, $table, $field, "$field-$id-$w", ${@$obj}[0], ${@$obj}[1], ${@$obj}[3]);
			} else {
				# Do this link already exist ?
				my $ok = &select_link($dbh, $language, $table, $field, $ref, ${@$obj}[0]);
				if ( !$ok ) {
					# Insert word into link
					&insert_link($dbh, $language, $table, $field, $ref, ${@$obj}[0], ${@$obj}[1], ${@$obj}[3]);
				}
			}
			$ins++;
			print "." if ( $trace );
		}
	}
	$id++;
}

#---------- Generate search indexes
&create_index($dbh, $language) if ( $clean );

#---------- Disconnect from the database
$dbh->disconnect();

my $t1 = new Benchmark;
if ( !$quiet ) {
	print "\nIndexing $nb new words with $ins word references tooks : ", timestr(timediff($t1, $t0)), "\n";
	print "Don't forget to manually delete your history file $incr when all indexing are proceed\n" if ( $incr );
	print "Done...\n";
}
#-------------------------------------------------------------------------

sub init_database {
	my ($conn, $language, $user) = @_;

	#------------- Check if we have to destroy all
	my $cmd = "SELECT count(s_reference) FROM t_dic_${language};";
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		$conn->rollback;
	} else {
		#------------- Destroy all indexes on search tables
		$cmd = "DROP INDEX \"t_dic_${language}_reference\";";
		$cur = $conn->prepare($cmd);
		$cur->execute();
		$conn->commit;

		$cmd = "DROP INDEX \"t_dic_${language}_word\";";
		$cur = $conn->prepare($cmd);
		$cur->execute();
		$conn->commit;

		$cmd = "DROP INDEX \"t_link_dic_${language}_ref_dic\";";
		$cur = $conn->prepare($cmd);
		$cur->execute();
		$conn->commit;

		#------------- Destroy all search tables
		$cmd = "DROP TABLE \"t_dic_$language\";";
		$cur = $conn->prepare($cmd);
		$cur->execute();
		$conn->commit;

		$cmd = "DROP TABLE \"t_link_dic_$language\";";
		$cur = $conn->prepare($cmd);
		$cur->execute();
		$conn->commit;
	}

	#------------- Create all search table
	$cmd = "CREATE TABLE \"t_dic_$language\" (\"s_reference\" text NOT NULL,\"s_word\" text);";
	$cur = $conn->prepare($cmd);
	$cur->execute();
	$cmd = "REVOKE ALL on \"t_dic_$language\" from PUBLIC;";
	$cur = $conn->prepare($cmd);
	$cur->execute();
	$cmd = "GRANT SELECT on \"t_dic_$language\" to \"$user\";";
	$cur = $conn->prepare($cmd);
	$cur->execute();

	$cmd = "CREATE TABLE \"t_link_dic_$language\" (\"s_ref_dic\" text NOT NULL,\"s_ref_obj\" text NOT NULL,\"s_type\" text,\"s_ref_user\" text,\"s_table\" text NOT NULL,\"s_field\" text NOT NULL);";
	$cur = $conn->prepare($cmd);
	$cur->execute();
	$cmd = "REVOKE ALL on \"t_link_dic_$language\" from PUBLIC;";
	$cur = $conn->prepare($cmd);
	$cur->execute();
	$cmd = "GRANT SELECT on \"t_link_dic_$language\" to \"$user\";";
	$cur = $conn->prepare($cmd);
	$cur->execute();
	$conn->commit;

}

sub create_index {
	my ($conn, $language) = @_;

	#------------- Create all index in search tables
	my $cmd = "CREATE INDEX \"t_dic_${language}_reference\" on \"t_dic_${language}\" using btree ( \"s_reference\" \"text_ops\" );";
	my $cur = $conn->prepare($cmd);
	$cur->execute();

	$cmd = "CREATE INDEX \"t_dic_${language}_word\" on \"t_dic_${language}\" using btree ( \"s_word\" \"text_ops\" );";
	$cur = $conn->prepare($cmd);
	$cur->execute();

	$cmd = "CREATE INDEX \"t_link_dic_${language}_ref_dic\" on \"t_link_dic_${language}\" using btree ( \"s_ref_dic\" \"text_ops\" );";
	$cur = $conn->prepare($cmd);
	$cur->execute();

	$conn->commit;

}


sub select_references {
	my ($conn, $tablename, $fieldname, $refobj) = @_;

	#------------- Get all references
	my $cmd = "SELECT s_reference,s_type,$fieldname,s_ref_user FROM $tablename";
	$cmd .= " WHERE s_reference='$refobj'" if ( $refobj );
	$cmd .= ";";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}

	#-------------  Store all tuples returned
	if (!$refobj) {
		my @data = ();
		while (my $array_ref = $cur->fetch) {
			push(@data, [ @$array_ref ]);
		}
		return @data;
	} else {
		my $array_ref = $cur->fetch;
		return $array_ref;
	}

}

sub select_string {
	my ($conn, $language, $ref) = @_;

	#------------- Get all string
	my $cmd = "SELECT s_string FROM t_translate WHERE s_id='$ref' AND s_ref_language='$language';";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}

	#-------------  Store all tuples returned
	my @data = ();
	while (my $array_ref = $cur->fetch) {
		push(@data, [ @$array_ref ]);
	}

	$cur->finish;

	@data;

}


sub select_link {
	my ($conn, $language, $tbl, $fld, $ref, $refobj) = @_;

	#------------- Get all string
	my $cmd = "SELECT s_ref_obj FROM t_link_dic_$language WHERE s_ref_dic='$ref' AND s_ref_obj='$refobj' AND s_table='$tbl' AND s_field='$fld';";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}

	#-------------  Store all tuples returned
	my @data = ();
	while (my $array_ref = $cur->fetch) {
		push(@data, [ @$array_ref ]);
	}

	$cur->finish;

	@data;

}

sub select_word
{
	my ($conn, $language, $word) = @_;

	#------------- Get all string
	my $cmd = "SELECT s_reference FROM t_dic_$language WHERE s_word='$word';";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}

	#-------------  Store all tuples returned
	my $data = $cur->fetchrow;
	$cur->finish;

	$data;

}

sub insert_word
{
	my ($conn, $language, $ref, $word) = @_;

	#------------- Get all string
	my $cmd = "INSERT INTO t_dic_$language VALUES ('$ref', '$word');";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}
	$cur->finish;

}


sub insert_link
{
	my ($conn, $language, $tbl, $fld, $refdic, $refobj, $type, $usr) = @_;
	#------------- Get all string
	my $cmd = "INSERT INTO t_link_dic_$language VALUES ('$refdic','$refobj','$type','$usr','$tbl','$fld');";

	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	if ($DBI::err) {
		print STDERR "ERROR lincat-indexer : $cmd\n";
		$conn->rollback;
		$conn->disconnect();
		exit(0);
	} else {
		$conn->commit;
	}
	$cur->finish;

}


sub delete_link
{
	my ($conn, $language, $tbl, $fld, $refobj) = @_;

	#------------- Set the delete query
	my $cmd = "DELETE FROM t_link_dic_$language WHERE s_ref_obj='$refobj' AND s_table='$tbl' AND s_field='$fld';";
print "$cmd\n";
	#-------------  Prepare and execute the query
	my $cur = $conn->prepare($cmd);
	$cur->execute();
	$conn->commit;

}
