Re: pg_upgrade with parallel tablespace copying

From: Bruce Momjian <bruce(at)momjian(dot)us>
To: PostgreSQL-development <pgsql-hackers(at)postgreSQL(dot)org>
Subject: Re: pg_upgrade with parallel tablespace copying
Date: 2013-01-09 13:58:26
Message-ID: 20130109135826.GF8789@momjian.us
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers


Slightly modified patch applied. This is my last planned pg_upgrade
change for 9.3.

---------------------------------------------------------------------------

On Mon, Jan 7, 2013 at 10:51:21PM -0500, Bruce Momjian wrote:
> Pg_upgrade by default (without --link) copies heap/index files from the
> old to new cluster. This patch implements parallel heap/index file
> copying in pg_upgrade using the --jobs option. It uses the same
> infrastructure used for pg_upgrade parallel dump/restore. Here are the
> performance results:
>
> --- seconds ---
> GB git patched
> 2 62.09 63.75
> 4 95.93 107.22
> 8 194.96 195.29
> 16 494.38 348.93
> 32 983.28 644.23
> 64 2227.73 1244.08
> 128 4735.83 2547.09
>
> Because of the kernel cache, you only see a big win when the amount of
> copy data exceeds the kernel cache. For testing, I used a 24GB, 16-core
> machine with two magnetic disks with one tablespace on each. Using more
> tablespaces would yield larger improvements. My test script is
> attached.
>
> I consider this patch ready for application. This is the last
> pg_upgrade performance improvement idea I am considering.
>
> --
> Bruce Momjian <bruce(at)momjian(dot)us> http://momjian.us
> EnterpriseDB http://enterprisedb.com
>
> + It's impossible for everything to be true. +

> diff --git a/contrib/pg_upgrade/check.c b/contrib/pg_upgrade/check.c
> new file mode 100644
> index 59f8fd0..1780788
> *** a/contrib/pg_upgrade/check.c
> --- b/contrib/pg_upgrade/check.c
> *************** create_script_for_old_cluster_deletion(c
> *** 606,612 ****
> fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata));
>
> /* delete old cluster's alternate tablespaces */
> ! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++)
> {
> /*
> * Do the old cluster's per-database directories share a directory
> --- 606,612 ----
> fprintf(script, RMDIR_CMD " %s\n", fix_path_separator(old_cluster.pgdata));
>
> /* delete old cluster's alternate tablespaces */
> ! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
> {
> /*
> * Do the old cluster's per-database directories share a directory
> *************** create_script_for_old_cluster_deletion(c
> *** 621,634 ****
> /* remove PG_VERSION? */
> if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
> fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
> ! fix_path_separator(os_info.tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix),
> PATH_SEPARATOR);
>
> for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
> {
> fprintf(script, RMDIR_CMD " %s%s%c%d\n",
> ! fix_path_separator(os_info.tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix),
> PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid);
> }
> --- 621,634 ----
> /* remove PG_VERSION? */
> if (GET_MAJOR_VERSION(old_cluster.major_version) <= 804)
> fprintf(script, RM_CMD " %s%s%cPG_VERSION\n",
> ! fix_path_separator(os_info.old_tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix),
> PATH_SEPARATOR);
>
> for (dbnum = 0; dbnum < old_cluster.dbarr.ndbs; dbnum++)
> {
> fprintf(script, RMDIR_CMD " %s%s%c%d\n",
> ! fix_path_separator(os_info.old_tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix),
> PATH_SEPARATOR, old_cluster.dbarr.dbs[dbnum].db_oid);
> }
> *************** create_script_for_old_cluster_deletion(c
> *** 640,646 ****
> * or a version-specific subdirectory.
> */
> fprintf(script, RMDIR_CMD " %s%s\n",
> ! fix_path_separator(os_info.tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix));
> }
>
> --- 640,646 ----
> * or a version-specific subdirectory.
> */
> fprintf(script, RMDIR_CMD " %s%s\n",
> ! fix_path_separator(os_info.old_tablespaces[tblnum]),
> fix_path_separator(old_cluster.tablespace_suffix));
> }
>
> diff --git a/contrib/pg_upgrade/info.c b/contrib/pg_upgrade/info.c
> new file mode 100644
> index 0c11ff8..7fd4584
> *** a/contrib/pg_upgrade/info.c
> --- b/contrib/pg_upgrade/info.c
> *************** create_rel_filename_map(const char *old_
> *** 106,125 ****
> * relation belongs to the default tablespace, hence relfiles should
> * exist in the data directories.
> */
> ! snprintf(map->old_dir, sizeof(map->old_dir), "%s/base/%u", old_data,
> ! old_db->db_oid);
> ! snprintf(map->new_dir, sizeof(map->new_dir), "%s/base/%u", new_data,
> ! new_db->db_oid);
> }
> else
> {
> /* relation belongs to a tablespace, so use the tablespace location */
> ! snprintf(map->old_dir, sizeof(map->old_dir), "%s%s/%u", old_rel->tablespace,
> ! old_cluster.tablespace_suffix, old_db->db_oid);
> ! snprintf(map->new_dir, sizeof(map->new_dir), "%s%s/%u", new_rel->tablespace,
> ! new_cluster.tablespace_suffix, new_db->db_oid);
> }
>
> /*
> * old_relfilenode might differ from pg_class.oid (and hence
> * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL.
> --- 106,130 ----
> * relation belongs to the default tablespace, hence relfiles should
> * exist in the data directories.
> */
> ! strlcpy(map->old_tablespace, old_data, sizeof(map->old_tablespace));
> ! strlcpy(map->new_tablespace, new_data, sizeof(map->new_tablespace));
> ! strlcpy(map->old_tablespace_suffix, "/base", sizeof(map->old_tablespace_suffix));
> ! strlcpy(map->new_tablespace_suffix, "/base", sizeof(map->new_tablespace_suffix));
> }
> else
> {
> /* relation belongs to a tablespace, so use the tablespace location */
> ! strlcpy(map->old_tablespace, old_rel->tablespace, sizeof(map->old_tablespace));
> ! strlcpy(map->new_tablespace, new_rel->tablespace, sizeof(map->new_tablespace));
> ! strlcpy(map->old_tablespace_suffix, old_cluster.tablespace_suffix,
> ! sizeof(map->old_tablespace_suffix));
> ! strlcpy(map->new_tablespace_suffix, new_cluster.tablespace_suffix,
> ! sizeof(map->new_tablespace_suffix));
> }
>
> + map->old_db_oid = old_db->db_oid;
> + map->new_db_oid = new_db->db_oid;
> +
> /*
> * old_relfilenode might differ from pg_class.oid (and hence
> * new_relfilenode) because of CLUSTER, REINDEX, or VACUUM FULL.
> diff --git a/contrib/pg_upgrade/parallel.c b/contrib/pg_upgrade/parallel.c
> new file mode 100644
> index 8ea36bc..d157511
> *** a/contrib/pg_upgrade/parallel.c
> --- b/contrib/pg_upgrade/parallel.c
> *************** typedef struct {
> *** 34,44 ****
> char log_file[MAXPGPATH];
> char opt_log_file[MAXPGPATH];
> char cmd[MAX_STRING];
> ! } thread_arg;
>
> ! thread_arg **thread_args;
>
> ! DWORD win32_exec_prog(thread_arg *args);
>
> #endif
>
> --- 34,57 ----
> char log_file[MAXPGPATH];
> char opt_log_file[MAXPGPATH];
> char cmd[MAX_STRING];
> ! } exec_thread_arg;
>
> ! typedef struct {
> ! DbInfoArr *old_db_arr;
> ! DbInfoArr *new_db_arr;
> ! char old_pgdata[MAXPGPATH];
> ! char new_pgdata[MAXPGPATH];
> ! char old_tablespace[MAXPGPATH];
> ! } transfer_thread_arg;
>
> ! exec_thread_arg **exec_thread_args;
> ! transfer_thread_arg **transfer_thread_args;
> !
> ! /* track current thread_args struct so reap_child() can be used for all cases */
> ! void **cur_thread_args;
> !
> ! DWORD win32_exec_prog(exec_thread_arg *args);
> ! DWORD win32_transfer_all_new_dbs(transfer_thread_arg *args);
>
> #endif
>
> *************** parallel_exec_prog(const char *log_file,
> *** 58,64 ****
> pid_t child;
> #else
> HANDLE child;
> ! thread_arg *new_arg;
> #endif
>
> va_start(args, fmt);
> --- 71,77 ----
> pid_t child;
> #else
> HANDLE child;
> ! exec_thread_arg *new_arg;
> #endif
>
> va_start(args, fmt);
> *************** parallel_exec_prog(const char *log_file,
> *** 71,77 ****
> else
> {
> /* parallel */
> !
> /* harvest any dead children */
> while (reap_child(false) == true)
> ;
> --- 84,92 ----
> else
> {
> /* parallel */
> ! #ifdef WIN32
> ! cur_thread_args = (void **)exec_thread_args;
> ! #endif
> /* harvest any dead children */
> while (reap_child(false) == true)
> ;
> *************** parallel_exec_prog(const char *log_file,
> *** 100,106 ****
> int i;
>
> thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE));
> ! thread_args = pg_malloc(user_opts.jobs * sizeof(thread_arg *));
>
> /*
> * For safety and performance, we keep the args allocated during
> --- 115,121 ----
> int i;
>
> thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE));
> ! exec_thread_args = pg_malloc(user_opts.jobs * sizeof(exec_thread_arg *));
>
> /*
> * For safety and performance, we keep the args allocated during
> *************** parallel_exec_prog(const char *log_file,
> *** 108,118 ****
> * in a thread different from the one that allocated it.
> */
> for (i = 0; i < user_opts.jobs; i++)
> ! thread_args[i] = pg_malloc(sizeof(thread_arg));
> }
>
> /* use first empty array element */
> ! new_arg = thread_args[parallel_jobs-1];
>
> /* Can only pass one pointer into the function, so use a struct */
> strcpy(new_arg->log_file, log_file);
> --- 123,133 ----
> * in a thread different from the one that allocated it.
> */
> for (i = 0; i < user_opts.jobs; i++)
> ! exec_thread_args[i] = pg_malloc(sizeof(exec_thread_arg));
> }
>
> /* use first empty array element */
> ! new_arg = exec_thread_args[parallel_jobs-1];
>
> /* Can only pass one pointer into the function, so use a struct */
> strcpy(new_arg->log_file, log_file);
> *************** parallel_exec_prog(const char *log_file,
> *** 134,140 ****
>
> #ifdef WIN32
> DWORD
> ! win32_exec_prog(thread_arg *args)
> {
> int ret;
>
> --- 149,155 ----
>
> #ifdef WIN32
> DWORD
> ! win32_exec_prog(exec_thread_arg *args)
> {
> int ret;
>
> *************** win32_exec_prog(thread_arg *args)
> *** 147,152 ****
> --- 162,273 ----
>
>
> /*
> + * parallel_transfer_all_new_dbs
> + *
> + * This has the same API as transfer_all_new_dbs, except it does parallel execution
> + * by transfering multiple tablespaces in parallel
> + */
> + void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> + char *old_pgdata, char *new_pgdata,
> + char *old_tablespace)
> + {
> + #ifndef WIN32
> + pid_t child;
> + #else
> + HANDLE child;
> + transfer_thread_arg *new_arg;
> + #endif
> +
> + if (user_opts.jobs <= 1)
> + /* throw_error must be true to allow jobs */
> + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL);
> + else
> + {
> + /* parallel */
> + #ifdef WIN32
> + cur_thread_args = (void **)transfer_thread_args;
> + #endif
> + /* harvest any dead children */
> + while (reap_child(false) == true)
> + ;
> +
> + /* must we wait for a dead child? */
> + if (parallel_jobs >= user_opts.jobs)
> + reap_child(true);
> +
> + /* set this before we start the job */
> + parallel_jobs++;
> +
> + /* Ensure stdio state is quiesced before forking */
> + fflush(NULL);
> +
> + #ifndef WIN32
> + child = fork();
> + if (child == 0)
> + {
> + transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata,
> + old_tablespace);
> + /* if we take another exit path, it will be non-zero */
> + /* use _exit to skip atexit() functions */
> + _exit(0);
> + }
> + else if (child < 0)
> + /* fork failed */
> + pg_log(PG_FATAL, "could not create worker process: %s\n", strerror(errno));
> + #else
> + if (thread_handles == NULL)
> + {
> + int i;
> +
> + thread_handles = pg_malloc(user_opts.jobs * sizeof(HANDLE));
> + transfer_thread_args = pg_malloc(user_opts.jobs * sizeof(transfer_thread_arg *));
> +
> + /*
> + * For safety and performance, we keep the args allocated during
> + * the entire life of the process, and we don't free the args
> + * in a thread different from the one that allocated it.
> + */
> + for (i = 0; i < user_opts.jobs; i++)
> + transfer_thread_args[i] = pg_malloc(sizeof(transfer_thread_arg));
> + }
> +
> + /* use first empty array element */
> + new_arg = transfer_thread_args[parallel_jobs-1];
> +
> + /* Can only pass one pointer into the function, so use a struct */
> + new_arg->old_db_arr = old_db_arr;
> + new_arg->new_db_arr = new_db_arr;
> + strcpy(new_arg->old_pgdata, old_pgdata);
> + strcpy(new_arg->new_pgdata, new_pgdata);
> + strcpy(new_arg->old_tablespace, old_tablespace);
> +
> + child = (HANDLE) _beginthreadex(NULL, 0, (void *) win32_exec_prog,
> + new_arg, 0, NULL);
> + if (child == 0)
> + pg_log(PG_FATAL, "could not create worker thread: %s\n", strerror(errno));
> +
> + thread_handles[parallel_jobs-1] = child;
> + #endif
> + }
> +
> + return;
> + }
> +
> +
> + #ifdef WIN32
> + DWORD
> + win32_transfer_all_new_dbs(transfer_thread_arg *args)
> + {
> + transfer_all_new_dbs(args->old_db_arr, args->new_db_arr, args->old_pgdata,
> + args->new_pgdata, args->old_tablespace);
> +
> + /* terminates thread */
> + return 0;
> + }
> + #endif
> +
> +
> + /*
> * collect status from a completed worker child
> */
> bool
> *************** reap_child(bool wait_for_child)
> *** 195,201 ****
> /* Move last slot into dead child's position */
> if (thread_num != parallel_jobs - 1)
> {
> ! thread_arg *tmp_args;
>
> thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
>
> --- 316,322 ----
> /* Move last slot into dead child's position */
> if (thread_num != parallel_jobs - 1)
> {
> ! void *tmp_args;
>
> thread_handles[thread_num] = thread_handles[parallel_jobs - 1];
>
> *************** reap_child(bool wait_for_child)
> *** 205,213 ****
> * reused by the next created thread. Instead, the new thread
> * will use the arg struct of the thread that just died.
> */
> ! tmp_args = thread_args[thread_num];
> ! thread_args[thread_num] = thread_args[parallel_jobs - 1];
> ! thread_args[parallel_jobs - 1] = tmp_args;
> }
> #endif
>
> --- 326,334 ----
> * reused by the next created thread. Instead, the new thread
> * will use the arg struct of the thread that just died.
> */
> ! tmp_args = cur_thread_args[thread_num];
> ! cur_thread_args[thread_num] = cur_thread_args[parallel_jobs - 1];
> ! cur_thread_args[parallel_jobs - 1] = tmp_args;
> }
> #endif
>
> diff --git a/contrib/pg_upgrade/pg_upgrade.c b/contrib/pg_upgrade/pg_upgrade.c
> new file mode 100644
> index 70c749d..85997e5
> *** a/contrib/pg_upgrade/pg_upgrade.c
> --- b/contrib/pg_upgrade/pg_upgrade.c
> *************** main(int argc, char **argv)
> *** 133,139 ****
> if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
> disable_old_cluster();
>
> ! transfer_all_new_dbs(&old_cluster.dbarr, &new_cluster.dbarr,
> old_cluster.pgdata, new_cluster.pgdata);
>
> /*
> --- 133,139 ----
> if (user_opts.transfer_mode == TRANSFER_MODE_LINK)
> disable_old_cluster();
>
> ! transfer_all_new_tablespaces(&old_cluster.dbarr, &new_cluster.dbarr,
> old_cluster.pgdata, new_cluster.pgdata);
>
> /*
> diff --git a/contrib/pg_upgrade/pg_upgrade.h b/contrib/pg_upgrade/pg_upgrade.h
> new file mode 100644
> index c1a2f53..d5c3fa9
> *** a/contrib/pg_upgrade/pg_upgrade.h
> --- b/contrib/pg_upgrade/pg_upgrade.h
> *************** typedef struct
> *** 134,141 ****
> */
> typedef struct
> {
> ! char old_dir[MAXPGPATH];
> ! char new_dir[MAXPGPATH];
>
> /*
> * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes
> --- 134,145 ----
> */
> typedef struct
> {
> ! char old_tablespace[MAXPGPATH];
> ! char new_tablespace[MAXPGPATH];
> ! char old_tablespace_suffix[MAXPGPATH];
> ! char new_tablespace_suffix[MAXPGPATH];
> ! Oid old_db_oid;
> ! Oid new_db_oid;
>
> /*
> * old/new relfilenodes might differ for pg_largeobject(_metadata) indexes
> *************** typedef struct
> *** 276,283 ****
> const char *progname; /* complete pathname for this program */
> char *exec_path; /* full path to my executable */
> char *user; /* username for clusters */
> ! char **tablespaces; /* tablespaces */
> ! int num_tablespaces;
> char **libraries; /* loadable libraries */
> int num_libraries;
> ClusterInfo *running_cluster;
> --- 280,287 ----
> const char *progname; /* complete pathname for this program */
> char *exec_path; /* full path to my executable */
> char *user; /* username for clusters */
> ! char **old_tablespaces; /* tablespaces */
> ! int num_old_tablespaces;
> char **libraries; /* loadable libraries */
> int num_libraries;
> ClusterInfo *running_cluster;
> *************** void get_sock_dir(ClusterInfo *cluster,
> *** 398,406 ****
> /* relfilenode.c */
>
> void get_pg_database_relfilenode(ClusterInfo *cluster);
> ! void transfer_all_new_dbs(DbInfoArr *olddb_arr,
> ! DbInfoArr *newdb_arr, char *old_pgdata, char *new_pgdata);
> !
>
> /* tablespace.c */
>
> --- 402,412 ----
> /* relfilenode.c */
>
> void get_pg_database_relfilenode(ClusterInfo *cluster);
> ! void transfer_all_new_tablespaces(DbInfoArr *old_db_arr,
> ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata);
> ! void transfer_all_new_dbs(DbInfoArr *old_db_arr,
> ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata,
> ! char *old_tablespace);
>
> /* tablespace.c */
>
> *************** void old_8_3_invalidate_bpchar_pattern_o
> *** 464,472 ****
> char *old_8_3_create_sequence_script(ClusterInfo *cluster);
>
> /* parallel.c */
> ! void parallel_exec_prog(const char *log_file, const char *opt_log_file,
> const char *fmt,...)
> __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
> !
> ! bool reap_child(bool wait_for_child);
>
> --- 470,480 ----
> char *old_8_3_create_sequence_script(ClusterInfo *cluster);
>
> /* parallel.c */
> ! void parallel_exec_prog(const char *log_file, const char *opt_log_file,
> const char *fmt,...)
> __attribute__((format(PG_PRINTF_ATTRIBUTE, 3, 4)));
> ! void parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> ! char *old_pgdata, char *new_pgdata,
> ! char *old_tablespace);
> ! bool reap_child(bool wait_for_child);
>
> diff --git a/contrib/pg_upgrade/relfilenode.c b/contrib/pg_upgrade/relfilenode.c
> new file mode 100644
> index 9d0d5a0..cfdd39c
> *** a/contrib/pg_upgrade/relfilenode.c
> --- b/contrib/pg_upgrade/relfilenode.c
> ***************
> *** 16,42 ****
>
>
> static void transfer_single_new_db(pageCnvCtx *pageConverter,
> ! FileNameMap *maps, int size);
> static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
> const char *suffix);
>
>
> /*
> * transfer_all_new_dbs()
> *
> * Responsible for upgrading all database. invokes routines to generate mappings and then
> * physically link the databases.
> */
> void
> ! transfer_all_new_dbs(DbInfoArr *old_db_arr,
> ! DbInfoArr *new_db_arr, char *old_pgdata, char *new_pgdata)
> {
> int old_dbnum,
> new_dbnum;
>
> - pg_log(PG_REPORT, "%s user relation files\n",
> - user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying");
> -
> /* Scan the old cluster databases and transfer their files */
> for (old_dbnum = new_dbnum = 0;
> old_dbnum < old_db_arr->ndbs;
> --- 16,81 ----
>
>
> static void transfer_single_new_db(pageCnvCtx *pageConverter,
> ! FileNameMap *maps, int size, char *old_tablespace);
> static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
> const char *suffix);
>
>
> /*
> + * transfer_all_new_tablespaces()
> + *
> + * Responsible for upgrading all database. invokes routines to generate mappings and then
> + * physically link the databases.
> + */
> + void
> + transfer_all_new_tablespaces(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> + char *old_pgdata, char *new_pgdata)
> + {
> + pg_log(PG_REPORT, "%s user relation files\n",
> + user_opts.transfer_mode == TRANSFER_MODE_LINK ? "Linking" : "Copying");
> +
> + /*
> + * Transfering files by tablespace is tricky because a single database
> + * can use multiple tablespaces. For non-parallel mode, we just pass a
> + * NULL tablespace path, which matches all tablespaces. In parallel mode,
> + * we pass the default tablespace and all user-created tablespaces
> + * and let those operations happen in parallel.
> + */
> + if (user_opts.jobs <= 1)
> + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata,
> + new_pgdata, NULL);
> + else
> + {
> + int tblnum;
> +
> + for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
> + parallel_transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata,
> + new_pgdata, os_info.old_tablespaces[tblnum]);
> + /* reap all children */
> + while (reap_child(true) == true)
> + ;
> + }
> +
> + end_progress_output();
> + check_ok();
> +
> + return;
> + }
> +
> +
> + /*
> * transfer_all_new_dbs()
> *
> * Responsible for upgrading all database. invokes routines to generate mappings and then
> * physically link the databases.
> */
> void
> ! transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
> ! char *old_pgdata, char *new_pgdata, char *old_tablespace)
> {
> int old_dbnum,
> new_dbnum;
>
> /* Scan the old cluster databases and transfer their files */
> for (old_dbnum = new_dbnum = 0;
> old_dbnum < old_db_arr->ndbs;
> *************** transfer_all_new_dbs(DbInfoArr *old_db_a
> *** 75,89 ****
> #ifdef PAGE_CONVERSION
> pageConverter = setupPageConverter();
> #endif
> ! transfer_single_new_db(pageConverter, mappings, n_maps);
>
> pg_free(mappings);
> }
> }
>
> - end_progress_output();
> - check_ok();
> -
> return;
> }
>
> --- 114,126 ----
> #ifdef PAGE_CONVERSION
> pageConverter = setupPageConverter();
> #endif
> ! transfer_single_new_db(pageConverter, mappings, n_maps,
> ! old_tablespace);
>
> pg_free(mappings);
> }
> }
>
> return;
> }
>
> *************** get_pg_database_relfilenode(ClusterInfo
> *** 125,131 ****
> */
> static void
> transfer_single_new_db(pageCnvCtx *pageConverter,
> ! FileNameMap *maps, int size)
> {
> int mapnum;
> bool vm_crashsafe_match = true;
> --- 162,168 ----
> */
> static void
> transfer_single_new_db(pageCnvCtx *pageConverter,
> ! FileNameMap *maps, int size, char *old_tablespace)
> {
> int mapnum;
> bool vm_crashsafe_match = true;
> *************** transfer_single_new_db(pageCnvCtx *pageC
> *** 140,157 ****
>
> for (mapnum = 0; mapnum < size; mapnum++)
> {
> ! /* transfer primary file */
> ! transfer_relfile(pageConverter, &maps[mapnum], "");
> !
> ! /* fsm/vm files added in PG 8.4 */
> ! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
> {
> ! /*
> ! * Copy/link any fsm and vm files, if they exist
> ! */
> ! transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
> ! if (vm_crashsafe_match)
> ! transfer_relfile(pageConverter, &maps[mapnum], "_vm");
> }
> }
> }
> --- 177,198 ----
>
> for (mapnum = 0; mapnum < size; mapnum++)
> {
> ! if (old_tablespace == NULL ||
> ! strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
> {
> ! /* transfer primary file */
> ! transfer_relfile(pageConverter, &maps[mapnum], "");
> !
> ! /* fsm/vm files added in PG 8.4 */
> ! if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
> ! {
> ! /*
> ! * Copy/link any fsm and vm files, if they exist
> ! */
> ! transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
> ! if (vm_crashsafe_match)
> ! transfer_relfile(pageConverter, &maps[mapnum], "_vm");
> ! }
> }
> }
> }
> *************** transfer_relfile(pageCnvCtx *pageConvert
> *** 187,196 ****
> else
> snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
>
> ! snprintf(old_file, sizeof(old_file), "%s/%u%s%s", map->old_dir,
> ! map->old_relfilenode, type_suffix, extent_suffix);
> ! snprintf(new_file, sizeof(new_file), "%s/%u%s%s", map->new_dir,
> ! map->new_relfilenode, type_suffix, extent_suffix);
>
> /* Is it an extent, fsm, or vm file? */
> if (type_suffix[0] != '\0' || segno != 0)
> --- 228,239 ----
> else
> snprintf(extent_suffix, sizeof(extent_suffix), ".%d", segno);
>
> ! snprintf(old_file, sizeof(old_file), "%s%s/%u/%u%s%s", map->old_tablespace,
> ! map->old_tablespace_suffix, map->old_db_oid, map->old_relfilenode,
> ! type_suffix, extent_suffix);
> ! snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s", map->new_tablespace,
> ! map->new_tablespace_suffix, map->new_db_oid, map->new_relfilenode,
> ! type_suffix, extent_suffix);
>
> /* Is it an extent, fsm, or vm file? */
> if (type_suffix[0] != '\0' || segno != 0)
> *************** transfer_relfile(pageCnvCtx *pageConvert
> *** 239,241 ****
> --- 282,285 ----
>
> return;
> }
> +
> diff --git a/contrib/pg_upgrade/tablespace.c b/contrib/pg_upgrade/tablespace.c
> new file mode 100644
> index a93c517..321738d
> *** a/contrib/pg_upgrade/tablespace.c
> --- b/contrib/pg_upgrade/tablespace.c
> *************** init_tablespaces(void)
> *** 23,29 ****
> set_tablespace_directory_suffix(&old_cluster);
> set_tablespace_directory_suffix(&new_cluster);
>
> ! if (os_info.num_tablespaces > 0 &&
> strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0)
> pg_log(PG_FATAL,
> "Cannot upgrade to/from the same system catalog version when\n"
> --- 23,29 ----
> set_tablespace_directory_suffix(&old_cluster);
> set_tablespace_directory_suffix(&new_cluster);
>
> ! if (os_info.num_old_tablespaces > 0 &&
> strcmp(old_cluster.tablespace_suffix, new_cluster.tablespace_suffix) == 0)
> pg_log(PG_FATAL,
> "Cannot upgrade to/from the same system catalog version when\n"
> *************** get_tablespace_paths(void)
> *** 57,72 ****
>
> res = executeQueryOrDie(conn, "%s", query);
>
> ! if ((os_info.num_tablespaces = PQntuples(res)) != 0)
> ! os_info.tablespaces = (char **) pg_malloc(
> ! os_info.num_tablespaces * sizeof(char *));
> else
> ! os_info.tablespaces = NULL;
>
> i_spclocation = PQfnumber(res, "spclocation");
>
> ! for (tblnum = 0; tblnum < os_info.num_tablespaces; tblnum++)
> ! os_info.tablespaces[tblnum] = pg_strdup(
> PQgetvalue(res, tblnum, i_spclocation));
>
> PQclear(res);
> --- 57,72 ----
>
> res = executeQueryOrDie(conn, "%s", query);
>
> ! if ((os_info.num_old_tablespaces = PQntuples(res)) != 0)
> ! os_info.old_tablespaces = (char **) pg_malloc(
> ! os_info.num_old_tablespaces * sizeof(char *));
> else
> ! os_info.old_tablespaces = NULL;
>
> i_spclocation = PQfnumber(res, "spclocation");
>
> ! for (tblnum = 0; tblnum < os_info.num_old_tablespaces; tblnum++)
> ! os_info.old_tablespaces[tblnum] = pg_strdup(
> PQgetvalue(res, tblnum, i_spclocation));
>
> PQclear(res);
> diff --git a/doc/src/sgml/pgupgrade.sgml b/doc/src/sgml/pgupgrade.sgml
> new file mode 100644
> index 53781e4..3e5d548
> *** a/doc/src/sgml/pgupgrade.sgml
> --- b/doc/src/sgml/pgupgrade.sgml
> *************** NET STOP pgsql-8.3 (<productname>Postgr
> *** 342,351 ****
>
> <para>
> The <option>--jobs</> option allows multiple CPU cores to be used
> ! to dump and reload database schemas in parallel; a good place to
> ! start is the number of CPU cores on the server. This option can
> ! dramatically reduce the time to upgrade a multi-database server
> ! running on a multiprocessor machine.
> </para>
>
> <para>
> --- 342,353 ----
>
> <para>
> The <option>--jobs</> option allows multiple CPU cores to be used
> ! for file copy operations and to dump and reload database schemas in
> ! parallel; a good place to start is the number of CPU cores on the
> ! server, or the number of tablespaces if not using the
> ! <option>--link</> option. This option can dramatically reduce the
> ! time to upgrade a multi-database server running on a multiprocessor
> ! machine.
> </para>
>
> <para>

> :
>
> . traprm
>
> export QUIET=$((QUIET + 1))
>
> > /rtmp/out
>
> OLDREL=9.2
> NEWREL=9.3
> BRANCH=tablespace
> SSD="f"
>
> export PGOPTIONS="-c synchronous_commit=off"
>
> for CYCLES in 2 4 8 16 32 64 128
> do
> echo "$CYCLES" >> /rtmp/out
> for JOBLIMIT in 1 2
> do
> cd /pgsql/$REL
> pgsw $BRANCH
> cd -
> tools/setup $OLDREL $NEWREL
> sleep 2
> [ "$SSD" = "f" ] && tools/mv_to_archive
>
> # need for +16k
> for CONFIG in /u/pgsql.old/data/postgresql.conf /u/pgsql/data/postgresql.conf
> do
> pipe sed 's/#max_locks_per_transaction = 64/max_locks_per_transaction = 64000/' "$CONFIG"
> pipe sed 's/shared_buffers = 128MB/shared_buffers = 1GB/' "$CONFIG"
> done
>
> pgstart -w /u/pgsql.old/data
> for DIR in /archive/tmp/t1 /backup0/tmp/t2
> do rm -rf "$DIR"
> mkdir "$DIR"
> chown postgres "$DIR"
> chmod 0700 "$DIR"
> done
>
> echo "CREATE TABLESPACE t1 LOCATION '/archive/tmp/t1';" | sql --echo-all test
> echo "CREATE TABLESPACE t2 LOCATION '/backup0/tmp/t2';" | sql --echo-all test
>
> for SPNO in $(jot 2)
> do
> for TBLNO in $(jot $(($CYCLES / 2)) )
> do
> echo "CREATE TABLE test${SPNO}_${TBLNO} (x TEXT) TABLESPACE t$SPNO;"
> echo "ALTER TABLE test${SPNO}_${TBLNO} ALTER COLUMN x SET STORAGE EXTERNAL;"
> echo "INSERT INTO test${SPNO}_${TBLNO} SELECT repeat('x', 999000000);"
> done |
> PGOPTIONS="-c synchronous_commit=off" sql --single-transaction --echo-all test
> done
> pgstop /u/pgsql.old/data
> sleep 2
> # clear cache
> echo 3 > /proc/sys/vm/drop_caches
> # allow system to repopulate
> sleep 15
> /usr/bin/time --output=/rtmp/out --append --format '%e' tools/upgrade -j $JOBLIMIT || exit
> sleep 2
> done
> done
>
> bell

>
> --
> Sent via pgsql-hackers mailing list (pgsql-hackers(at)postgresql(dot)org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers

--
Bruce Momjian <bruce(at)momjian(dot)us> http://momjian.us
EnterpriseDB http://enterprisedb.com

+ It's impossible for everything to be true. +

In response to

Browse pgsql-hackers by date

  From Date Subject
Next Message Alvaro Herrera 2013-01-09 14:45:12 Re: [PATCH 1/2] Provide a common malloc wrappers and palloc et al. emulation for frontend'ish environs
Previous Message Andres Freund 2013-01-09 13:58:12 Re: [PATCH 1/2] Provide a common malloc wrappers and palloc et al. emulation for frontend'ish environs