From e9b8d4579c4adf0582f739327aaa3b9877311633 Mon Sep 17 00:00:00 2001 From: Rintaro Ikeda Date: Tue, 1 Jul 2025 14:18:44 +0900 Subject: [PATCH v5 1/2] When the option is set, client rolls back the failed transaction and starts a new one when its transaction fails due to the reason other than the deadlock and serialization failure. --- doc/src/sgml/ref/pgbench.sgml | 70 +++++++++++++++----- src/bin/pgbench/pgbench.c | 51 ++++++++++++-- src/bin/pgbench/t/001_pgbench_with_server.pl | 22 ++++++ 3 files changed, 121 insertions(+), 22 deletions(-) diff --git a/doc/src/sgml/ref/pgbench.sgml b/doc/src/sgml/ref/pgbench.sgml index ab252d9fc74..cc5ab173f2f 100644 --- a/doc/src/sgml/ref/pgbench.sgml +++ b/doc/src/sgml/ref/pgbench.sgml @@ -77,8 +77,8 @@ tps = 896.967014 (without initial connection time) failed before completion or some SQL command(s) failed. (In mode, only the actual number of transactions is printed.) The next line reports the number of failed transactions due to - serialization or deadlock errors (see - for more information). + serialization or deadlock errors by default (see + for more information). The last line reports the number of transactions per second. @@ -790,6 +790,9 @@ pgbench options d deadlock failures; + + other failures; + See for more information. @@ -914,6 +917,26 @@ pgbench options d + + + + + Allows clients to continue their run even if an SQL statement fails due to + errors other than serialization or deadlock. Unlike serialization and deadlock + failures, clients do not retry the same transactions but start new transaction. + This option is useful when your custom script may raise errors due to some + reason like unique constraints violation. Without this option, the client is + aborted after such errors. + + + Note that serialization and deadlock failures never cause the client to be + aborted even after clients retries times by + default, so they are not affected by this option. + See for more information. + + + + @@ -2409,8 +2432,8 @@ END; will be reported as failed. If you use the option, the time of the failed transaction will be reported as - serialization or - deadlock depending on the type of failure (see + serialization, deadlock, or + other depending on the type of failure (see for more information). @@ -2638,6 +2661,16 @@ END; + + + other_sql_failures + + + number of transactions that got a SQL error + (zero unless is specified) + + + @@ -2646,8 +2679,8 @@ END; pgbench --aggregate-interval=10 --time=20 --client=10 --log --rate=1000 --latency-limit=10 --failures-detailed --max-tries=10 test -1650260552 5178 26171317 177284491527 1136 44462 2647617 7321113867 0 9866 64 7564 28340 4148 0 -1650260562 4808 25573984 220121792172 1171 62083 3037380 9666800914 0 9998 598 7392 26621 4527 0 +1650260552 5178 26171317 177284491527 1136 44462 2647617 7321113867 0 9866 64 7564 28340 4148 0 0 +1650260562 4808 25573984 220121792172 1171 62083 3037380 9666800914 0 9998 598 7392 26621 4527 0 0 @@ -2839,9 +2872,11 @@ statement latencies in milliseconds, failures and retries: is specified. Otherwise in the worst case they only lead to the abortion of the failed client while other clients continue their run (but some client errors are handled without - an abortion of the client and reported separately, see below). Later in - this section it is assumed that the discussed errors are only the - direct client errors and they are not internal + an abortion of the client and reported separately, see below). When + is specified, the client + continues to process new transactions even if it encounters an error. + Later in this section it is assumed that the discussed errors are only + the direct client errors and they are not internal pgbench errors. @@ -2853,12 +2888,14 @@ statement latencies in milliseconds, failures and retries: connection with the database server was lost or the end of script was reached without completing the last transaction. In addition, if execution of an SQL or meta command fails for reasons other than serialization or deadlock errors, - the client is aborted. Otherwise, if an SQL command fails with serialization or - deadlock errors, the client is not aborted. In such cases, the current - transaction is rolled back, which also includes setting the client variables - as they were before the run of this transaction (it is assumed that one - transaction script contains only one transaction; see - for more information). + the client is aborted by default. However, if the --continue-on-error option + is specified, the client does not abort and proceeds to the next transaction + regardless of the error. This case is reported as other failures in the output. + Otherwise, if an SQL command fails with serialization or deadlock errors, the + client is not aborted. In such cases, the current transaction is rolled back, + which also includes setting the client variables as they were before the run + of this transaction (it is assumed that one transaction script contains only + one transaction; see for more information). Transactions with serialization or deadlock errors are repeated after rollbacks until they complete successfully or reach the maximum number of tries (specified by the option) / the maximum @@ -2898,7 +2935,8 @@ statement latencies in milliseconds, failures and retries: The main report contains the number of failed transactions. If the - option is not equal to 1, the main report also + option is not equal to 1 and + is not specified, the main report also contains statistics related to retries: the total number of retried transactions and total number of retries. The per-script report inherits all these fields from the main report. The per-statement report displays retry diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index 497a936c141..15207290811 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -402,7 +402,8 @@ typedef struct StatsData * directly successful transactions (they were successfully completed on * the first try). * - * A failed transaction is defined as unsuccessfully retried transactions. + * A failed transaction is defined as unsuccessfully retried transactions + * unless continue-on-error option is specified. * It can be one of two types: * * failed (the number of failed transactions) = @@ -411,6 +412,12 @@ typedef struct StatsData * 'deadlock_failures' (they got a deadlock error and were not * successfully retried). * + * When continue-on-error option is specified, + * failed (the number of failed transactions) = + * 'serialization_failures' + 'deadlock_failures' + + * 'other_sql_failures' (they got a error when continue-on-error option + * was specified). + * * If the transaction was retried after a serialization or a deadlock * error this does not guarantee that this retry was successful. Thus * @@ -440,6 +447,11 @@ typedef struct StatsData int64 deadlock_failures; /* number of transactions that were not * successfully retried after a deadlock * error */ + int64 other_sql_failures; /* number of failed transactions for + * reasons other than + * serialization/deadlock failure , which + * is enabled if --continue-on-error is + * used */ SimpleStats latency; SimpleStats lag; } StatsData; @@ -770,6 +782,7 @@ static int64 total_weight = 0; static bool verbose_errors = false; /* print verbose messages of all errors */ static bool exit_on_abort = false; /* exit when any client is aborted */ +static bool continue_on_error = false; /* continue after errors */ /* Builtin test scripts */ typedef struct BuiltinScript @@ -954,6 +967,7 @@ usage(void) " --log-prefix=PREFIX prefix for transaction time log file\n" " (default: \"pgbench_log\")\n" " --max-tries=NUM max number of tries to run transaction (default: 1)\n" + " --continue-on-error continue processing transactions after a trasaction fails\n" " --progress-timestamp use Unix epoch timestamps for progress\n" " --random-seed=SEED set random seed (\"time\", \"rand\", integer)\n" " --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n" @@ -1467,6 +1481,7 @@ initStats(StatsData *sd, pg_time_usec_t start) sd->retried = 0; sd->serialization_failures = 0; sd->deadlock_failures = 0; + sd->other_sql_failures = 0; initSimpleStats(&sd->latency); initSimpleStats(&sd->lag); } @@ -1516,6 +1531,9 @@ accumStats(StatsData *stats, bool skipped, double lat, double lag, case ESTATUS_DEADLOCK_ERROR: stats->deadlock_failures++; break; + case ESTATUS_OTHER_SQL_ERROR: + stats->other_sql_failures++; + break; default: /* internal error which should never occur */ pg_fatal("unexpected error status: %d", estatus); @@ -4007,7 +4025,7 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg) if (PQpipelineStatus(st->con) != PQ_PIPELINE_ON) st->state = CSTATE_END_COMMAND; } - else if (canRetryError(st->estatus)) + else if (continue_on_error | canRetryError(st->estatus)) st->state = CSTATE_ERROR; else st->state = CSTATE_ABORTED; @@ -4528,7 +4546,8 @@ static int64 getFailures(const StatsData *stats) { return (stats->serialization_failures + - stats->deadlock_failures); + stats->deadlock_failures + + stats->other_sql_failures); } /* @@ -4548,6 +4567,8 @@ getResultString(bool skipped, EStatus estatus) return "serialization"; case ESTATUS_DEADLOCK_ERROR: return "deadlock"; + case ESTATUS_OTHER_SQL_ERROR: + return "other"; default: /* internal error which should never occur */ pg_fatal("unexpected error status: %d", estatus); @@ -4603,6 +4624,7 @@ doLog(TState *thread, CState *st, int64 skipped = 0; int64 serialization_failures = 0; int64 deadlock_failures = 0; + int64 other_sql_failures = 0; int64 retried = 0; int64 retries = 0; @@ -4643,10 +4665,12 @@ doLog(TState *thread, CState *st, { serialization_failures = agg->serialization_failures; deadlock_failures = agg->deadlock_failures; + other_sql_failures = agg->other_sql_failures; } - fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT, + fprintf(logfile, " " INT64_FORMAT " " INT64_FORMAT " " INT64_FORMAT, serialization_failures, - deadlock_failures); + deadlock_failures, + other_sql_failures); fputc('\n', logfile); @@ -6285,6 +6309,7 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now, cur.serialization_failures += threads[i].stats.serialization_failures; cur.deadlock_failures += threads[i].stats.deadlock_failures; + cur.other_sql_failures += threads[i].stats.other_sql_failures; } /* we count only actually executed transactions */ @@ -6427,7 +6452,8 @@ printResults(StatsData *total, /* * Remaining stats are nonsensical if we failed to execute any xacts due - * to others than serialization or deadlock errors + * to other than serialization or deadlock errors and --continue-on-error + * is not set. */ if (total_cnt <= 0) return; @@ -6443,6 +6469,9 @@ printResults(StatsData *total, printf("number of deadlock failures: " INT64_FORMAT " (%.3f%%)\n", total->deadlock_failures, 100.0 * total->deadlock_failures / total_cnt); + printf("number of other failures: " INT64_FORMAT " (%.3f%%)\n", + total->other_sql_failures, + 100.0 * total->other_sql_failures / total_cnt); } /* it can be non-zero only if max_tries is not equal to one */ @@ -6546,6 +6575,10 @@ printResults(StatsData *total, sstats->deadlock_failures, (100.0 * sstats->deadlock_failures / script_total_cnt)); + printf(" - number of other failures: " INT64_FORMAT " (%.3f%%)\n", + sstats->other_sql_failures, + (100.0 * sstats->other_sql_failures / + script_total_cnt)); } /* @@ -6705,6 +6738,7 @@ main(int argc, char **argv) {"verbose-errors", no_argument, NULL, 15}, {"exit-on-abort", no_argument, NULL, 16}, {"debug", no_argument, NULL, 17}, + {"continue-on-error", no_argument, NULL, 18}, {NULL, 0, NULL, 0} }; @@ -7058,6 +7092,10 @@ main(int argc, char **argv) case 17: /* debug */ pg_logging_increase_verbosity(); break; + case 18: /* continue-on-error */ + benchmarking_option_set = true; + continue_on_error = true; + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -7413,6 +7451,7 @@ main(int argc, char **argv) stats.retried += thread->stats.retried; stats.serialization_failures += thread->stats.serialization_failures; stats.deadlock_failures += thread->stats.deadlock_failures; + stats.other_sql_failures += thread->stats.other_sql_failures; latency_late += thread->latency_late; conn_total_duration += thread->conn_duration; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index 7dd78940300..afb49b554d0 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -1813,6 +1813,28 @@ update counter set i = i+1 returning i \gset # Clean up $node->safe_psql('postgres', 'DROP TABLE counter;'); +# Test --continue-on-error +$node->safe_psql('postgres', + 'CREATE TABLE unique_table(i int unique); ' . 'INSERT INTO unique_table VALUES (0);'); + +$node->pgbench( + '-t 10 --continue-on-error --failures-detailed', + 0, + [ + qr{processed: 0/10\b}, + qr{other failures: 10\b} + ], + [], + 'test --continue-on-error', + { + '002_continue_on_error' => q{ + insert into unique_table values 0; + } + }); + +# Clean up +$node->safe_psql('postgres', 'DROP TABLE unique_table;'); + # done $node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts'); $node->stop; -- 2.39.5 (Apple Git-154)