Re: Protect syscache from bloating with negative cache entries

From: Kyotaro Horiguchi <horikyota(dot)ntt(at)gmail(dot)com>
To: hlinnaka(at)iki(dot)fi
Cc: robertmhaas(at)gmail(dot)com, ideriha(dot)takeshi(at)jp(dot)fujitsu(dot)com, tomas(dot)vondra(at)2ndquadrant(dot)com, tgl(at)sss(dot)pgh(dot)pa(dot)us, andres(at)anarazel(dot)de, tsunakawa(dot)takay(at)jp(dot)fujitsu(dot)com, alvherre(at)2ndquadrant(dot)com, bruce(at)momjian(dot)us, pgsql-hackers(at)lists(dot)postgresql(dot)org, michael(dot)paquier(at)gmail(dot)com, david(at)pgmasters(dot)net, craig(at)2ndquadrant(dot)com
Subject: Re: Protect syscache from bloating with negative cache entries
Date: 2020-11-09 09:34:47
Message-ID: 20201109.183447.202900641623450560.horikyota.ntt@gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

At Fri, 6 Nov 2020 10:42:15 +0200, Heikki Linnakangas <hlinnaka(at)iki(dot)fi> wrote in
> Do you need the "ntaccess == 2" test? You could always increment the
> counter, and in the code that uses ntaccess to decide what to evict,
> treat all values >= 2 the same.
>
> Need to handle integer overflow somehow. Or maybe not: integer
> overflow is so infrequent that even if a hot syscache entry gets
> evicted prematurely because its ntaccess count wrapped around to 0, it
> will happen so rarely that it won't make any difference in practice.

That relaxing simplifies the code significantly, but a significant
degradation by about 5% still exists.

(SearchCatCacheInternal())
+ ct->naccess++;
!+ ct->lastaccess = catcacheclock;

If I removed the second line above, the degradation disappears
(-0.7%). However, I don't find the corresponding numbers in the output
of perf. The sum of the numbers for the removed instructions is (0.02
+ 0.28 = 0.3%). I don't think the degradation as the whole doesn't
always reflect to the instruction level profiling, but I'm stuck here,
anyway.

% samples
master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp)
| | | SearchCatCacheInternal():
| | | ct->naccess++;
| | | ct->lastaccess = catcacheclock;
----- |----- | 0.02 |10f: mov catcacheclock,%rax
| | | ct->naccess++;
----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx)
| | | return NULL;
----- | 0.11 | 0.16 | xor %ebp,%ebp
| | | if (!ct->negative)
0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx)
| | | ct->lastaccess = catcacheclock;
----- | ---- | 0.28 | mov %rax,0x18(%rbx)
| | | if (!ct->negative)
0.34 | 0.08 | 0.59 | ↓ jne 149

For your information, the same table for a bit wider range follows.

% samples
master p2 patched (p2 = patched - "ct->lastaccess = catcacheclock)
=============================================================================
| | | dlist_foreach(iter, bucket)
6.91 | 7.06 | 5.89 | mov 0x8(%rbp),%rbx
0.78 | 0.73 | 0.81 | test %rbx,%rbx
| | | ↓ je 160
| | | cmp %rbx,%rbp
0.46 | 0.52 | 0.39 | ↓ jne 9d
| | | ↓ jmpq 160
| | | nop
5.68 | 5.54 | 6.03 | 90: mov 0x8(%rbx),%rbx
1.44 | 1.42 | 1.43 | cmp %rbx,%rbp
| | | ↓ je 160
| | | {
| | | ct = dlist_container(CatCTup, cache_elem, iter.cur);
| | |
| | | if (ct->dead)
30.36 |30.97 | 31.48 | 9d: cmpb $0x0,0x20(%rbx)
2.63 | 2.60 | 2.69 | ↑ jne 90
| | | continue; /* ignore dead entries */
| | |
| | | if (ct->hash_value != hashValue)
1.41 | 1.37 | 1.35 | cmp -0x24(%rbx),%edx
3.19 | 2.97 | 2.87 | ↑ jne 90
7.17 | 5.53 | 6.89 | mov %r13,%rsi
0.02 | 0.04 | 0.04 | xor %r12d,%r12d
3.00 | 2.98 | 2.95 | ↓ jmp b5
0.15 | 0.61 | 0.20 | b0: mov 0x10(%rsp,%r12,1),%rsi
6.58 | 5.04 | 5.95 | b5: mov %ecx,0xc(%rsp)
| | | CatalogCacheCompareTuple():
| | | if (!(cc_fastequal[i]) (cachekeys[i], searchkeys[i]))
1.51 | 0.92 | 1.66 | mov -0x20(%rbx,%r12,1),%rdi
0.54 | 1.64 | 0.58 | mov %edx,0x8(%rsp)
3.78 | 3.11 | 3.86 | → callq *0x38(%r14,%r12,1)
0.43 | 2.30 | 0.34 | mov 0x8(%rsp),%edx
0.20 | 0.94 | 0.25 | mov 0xc(%rsp),%ecx
0.44 | 0.41 | 0.44 | test %al,%al
| | | ↑ je 90
| | | for (i = 0; i < nkeys; i++)
2.28 | 1.07 | 2.26 | add $0x8,%r12
0.08 | 0.23 | 0.07 | cmp $0x18,%r12
0.11 | 0.64 | 0.10 | ↑ jne b0
| | | dlist_move_head():
| | | */
| | | static inline void
| | | dlist_move_head(dlist_head *head, dlist_node *node)
| | | {
| | | /* fast path if it's already at the head */
| | | if (head->head.next == node)
0.08 | 0.61 | 0.04 | cmp 0x8(%rbp),%rbx
0.02 | 0.10 | 0.00 | ↓ je 10f
| | | return;
| | |
| | | dlist_delete(node);
0.01 | 0.20 | 0.06 | mov 0x8(%rbx),%rax
| | | dlist_delete():
| | | node->prev->next = node->next;
0.75 | 0.13 | 0.72 | mov (%rbx),%rdx
2.89 | 3.42 | 2.22 | mov %rax,0x8(%rdx)
| | | node->next->prev = node->prev;
0.01 | 0.09 | 0.00 | mov (%rbx),%rdx
0.04 | 0.62 | 0.58 | mov %rdx,(%rax)
| | | dlist_push_head():
| | | if (head->head.next == NULL) /* convert NULL header to circular */
0.31 | 0.08 | 0.28 | mov 0x8(%rbp),%rax
0.55 | 0.44 | 0.28 | test %rax,%rax
| | | ↓ je 180
| | | node->next = head->head.next;
0.00 | 0.08 | 0.06 |101: mov %rax,0x8(%rbx)
| | | node->prev = &head->head;
0.17 | 0.73 | 0.37 | mov %rbp,(%rbx)
| | | node->next->prev = node;
0.34 | 0.08 | 1.13 | mov %rbx,(%rax)
| | | head->head.next = node;
0.47 | 0.27 | 0.17 | mov %rbx,0x8(%rbp)
| | | SearchCatCacheInternal():
| | | ct->naccess++;
| | | ct->lastaccess = catcacheclock;
----- |----- | 0.02 |10f: mov catcacheclock,%rax
| | | ct->naccess++;
----- | 0.96 | 1.00 | addl $0x1,0x14(%rbx)
| | | return NULL;
----- | 0.11 | 0.16 | xor %ebp,%ebp
| | | if (!ct->negative)
0.27 | 0.30 | 0.03 | cmpb $0x0,0x21(%rbx)
| | | ct->lastaccess = catcacheclock;
----- | ---- | 0.28 | mov %rax,0x18(%rbx)
| | | if (!ct->negative)
0.34 | 0.08 | 0.59 | ↓ jne 149

--
Kyotaro Horiguchi
NTT Open Source Software Center

Attachment Content-Type Size
v5-0001-CatCache-expiration-feature.patch text/x-patch 9.0 KB

In response to

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Magnus Hagander 2020-11-09 09:39:21 Re: Useless string ouput in error message
Previous Message Dilip Kumar 2020-11-09 09:31:10 Re: logical streaming of xacts via test_decoding is broken