#!/usr/bin/perl use strict; use warnings; use DBI; use File::Slurp; my @words = map { chomp; $_; } read_file("words.txt"); my $words = scalar(@words); my $avg = 200; my $spread = 50; my $docs = 50000; my $dbh = DBI->connect("dbi:Pg:dbname=ftstest","jk","jk",{autocommit => 1}); my $create = <do($create); my $sth = $dbh->prepare("insert into ftstest(body,body_fts) values(?,to_tsvector('english',?))"); for(my $i= 0; $i < $docs; $i++){ my @doc = (); my $commonpos = int(rand($spread/2))+$avg; for(my $j = 0; $j < $spread/2 + $avg; $j++){ my $word = int(rand($words)); push @doc,$words[$word]; push @doc,"commonterm" if $commonpos == $j; # This should only match once. my $rand = rand(); push @doc,"commonterm80" if $commonpos == $j && $rand < 0.8; push @doc,"commonterm60" if $commonpos == $j && $rand < 0.6; push @doc,"commonterm40" if $commonpos == $j && $rand < 0.4; push @doc,"rareterm30" if $commonpos == $j && $rand < 0.3; push @doc,"rareterm20" if $commonpos == $j && $rand < 0.2; push @doc,"rareterm10" if $commonpos == $j && $rand < 0.1; push @doc,"rareterm5" if $commonpos == $j && $rand < 0.05; push @doc,"rareterm1" if $commonpos == $j && $rand < 0.01; } my $doc = join(" ",@doc); $sth->execute($doc,$doc); } $dbh->do("analyze");