Bug#1036213: apache2: frequent SIGSEGV in mod_http2.so (purge_consumed_buckets)
Hello,
I ran with /usr/lib/apache2/modules/mod_http2_2.0.15.so &
/usr/lib/apache2/modules/mod_proxy_http2_2.0.15.so since yesterday
15:20, and got a SIGSEGV at 21:29:30 : it's not in
purge_consumed_buckets but it's in the h2_proxy stack ...
Here is the bt full of the core:
#0 0x00007f9bb0464efe in ssl_io_filter_output (f=0x7f9ba02088d8, bb=0x7f9ba04cf3b8) at ssl_engine_io.c:1963
bucket = 0x7f9ba04c4108
status = 0
filter_ctx = 0x7f9ba0208880
inctx = <optimized out>
outctx = 0x7f9ba0208900
rblock = <optimized out>
#1 0x00007f9bb0597999 in proxy_pass_brigade (flush=1, bb=<optimized out>, origin=<optimized out>, p_conn=0x7f9ba020a0a0, bucket_alloc=<optimized out>) at h2_proxy_session.c:218
status = <optimized out>
transferred = 9
status = <optimized out>
transferred = <optimized out>
e = <optimized out>
ap__b = <optimized out>
#2 raw_send (ngh2=<optimized out>, data=<optimized out>, length=9, flags=<optimized out>, user_data=0x7f9ba04cf190) at h2_proxy_session.c:244
session = 0x7f9ba04cf190
b = <optimized out>
status = <optimized out>
flush = 1
#3 0x00007f9bb14a11f9 in nghttp2_session_send () from /usr/lib/x86_64-linux-gnu/libnghttp2.so.14
No symbol table info available.
#4 0x00007f9bb059a9b9 in send_loop (session=0x7f9ba04cf190) at h2_proxy_session.c:1517
rv = <optimized out>
#5 h2_proxy_session_process (session=0x7f9ba04cf190) at h2_proxy_session.c:1553
status = <optimized out>
have_written = 0
have_read = 0
run_loop = <optimized out>
#6 0x00007f9bb059db44 in ctx_run (ctx=0x7f9ba06014e0) at mod_proxy_http2.c:258
status = 0
h2_front = <optimized out>
status = <optimized out>
h2_front = <optimized out>
out = <optimized out>
#7 proxy_http2_handler (r=<optimized out>, worker=<optimized out>, conf=<optimized out>, url=<optimized out>, proxyname=<optimized out>, proxyport=<optimized out>) at mod_proxy_http2.c:405
proxy_func = <optimized out>
locurl = 0x7f9ba0601610 "/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
u = <optimized out>
slen = <optimized out>
is_ssl = <optimized out>
status = 0
ctx = 0x7f9ba06014e0
uri = {scheme = 0x7f9ba0601580 "h2", hostinfo = 0x7f9ba0601588 "doh.geekwu.org:5343", user = 0x0, password = 0x0, hostname = 0x7f9ba06015a0 "doh.geekwu.org", port_str = 0x7f9ba06015b0 "5343", path = 0x7f9ba06015b8 "/dns-query",
query = 0x7f9ba06015c8 "dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB", fragment = 0x0, hostent = 0x0, port = 5343, is_initialized = 1, dns_looked_up = 0, dns_resolved = 0}
reconnects = <optimized out>
run_connect = <optimized out>
#8 0x00007f9bb0538943 in proxy_run_scheme_handler (r=r@entry=0x7f9ba05910a0, worker=0x7f9bb15cbbb0, conf=conf@entry=0x7f9bb15cba08,
url=0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB", proxyhost=proxyhost@entry=0x0, proxyport=proxyport@entry=0) at mod_proxy.c:3437
pHook = <optimized out>
n = 3
rv = -1
#9 0x00007f9bb053a9d2 in proxy_handler (r=0x7f9ba05910a0) at mod_proxy.c:1510
url = 0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
uri = 0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
scheme = 0x7f9ba0601470 "h2"
p = <optimized out>
p2 = 0x7f9ba0208300 "(\200 \240\233\177"
sconf = <optimized out>
conf = 0x7f9bb15cba08
proxies = 0x7f9bb15cbac8
ents = 0x7f9bb05b4688
i = <optimized out>
rc = <optimized out>
access_status = 0
direct_connect = <optimized out>
str = <optimized out>
maxfwd = <optimized out>
balancer = 0x0
worker = 0x7f9bb15cbbb0
attempts = 0
max_attempts = 0
list = <optimized out>
saved_status = <optimized out>
#10 0x0000556306b489d0 in ap_run_handler (r=r@entry=0x7f9ba05910a0) at config.c:169
pHook = <optimized out>
n = 1
rv = -1
#11 0x0000556306b48fc6 in ap_invoke_handler (r=r@entry=0x7f9ba05910a0) at config.c:443
handler = <optimized out>
p = <optimized out>
result = 0
old_handler = 0x7f9bb054930b "proxy-server"
ignore = <optimized out>
#12 0x0000556306b619db in ap_process_async_request (r=r@entry=0x7f9ba05910a0) at http_request.c:452
c = 0x7f9ba06430a0
access_status = 0
#13 0x0000556306b61c1e in ap_process_request (r=r@entry=0x7f9ba05910a0) at http_request.c:487
bb = 0x7f9ba06434b0
b = <optimized out>
c = 0x7f9ba06430a0
rv = <optimized out>
#14 0x00007f9bb116bbe6 in c2_process (c=0x7f9ba06430a0, conn_ctx=0x7f9ba06434b0) at h2_c2.c:723
cs = 0x7f9ba0643658
tenc = <optimized out>
timeout = <optimized out>
req = 0x7f9ba04f7180
r = 0x7f9ba05910a0
cleanup = <optimized out>
req = <optimized out>
cs = <optimized out>
r = <optimized out>
tenc = <optimized out>
timeout = <optimized out>
cleanup = <optimized out>
#15 h2_c2_hook_process (c=0x7f9ba06430a0) at h2_c2.c:840
ctx = 0x7f9ba06434b0
#16 0x0000556306b52730 in ap_run_process_connection (c=c@entry=0x7f9ba06430a0) at connection.c:42
pHook = <optimized out>
n = 0
rv = -1
#17 0x00007f9bb116c4d7 in h2_c2_process (c2=c2@entry=0x7f9ba06430a0, thread=thread@entry=0x7f9bafdfb0b0, worker_id=<optimized out>) at h2_c2.c:658
conn_ctx = 0x7f9ba06434b0
#18 0x00007f9bb1187567 in slot_run (thread=0x7f9bafdfb0b0, wctx=0x7f9bb0511850) at h2_workers.c:299
slot = 0x7f9bb0511850
workers = 0x7f9bb05110a0
c = 0x7f9ba06430a0
rv = <optimized out>
#19 0x00007f9bb1844ea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
ret = <optimized out>
pd = <optimized out>
unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140306500486912, 3999667409471690328, 140722438679950, 140722438679951, 140306500484992, 8396800, -3983747864113949096, -3983768835883903400}, mask_was_saved = 0}}, priv = {pad = {
0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
not_first_call = 0
#20 0x00007f9bb1764a2f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
The request itself seems legit (I can run it without crashing the
server).
The last log entry before 21:29:30 for this query is a few minutes
before the crash :
/var/log/apache2/access.log:37.98.109.173 - - [17/May/2023:21:26:57 +0200] "GET /dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB HTTP/2.0" 200 103 "-" "-"
At this time, I have 23 ESTABLISHED (no TIME_WAIT) connections from
this IP, with no requests in access log since yesterday ; so I guess
it's a mis-behaving client
Regards,
Le mercredi 17 mai 2023 à 13:24 +0200, Stefan Eissing a écrit :
> Sorry to hear about this. I think the recent change that could be
> relevant here is
> the addition of:
>
> h2_mplx.c#515: c1_purge_streams(m);
>
> as seen in Apache httpd trunk and at https://github.com/icing/mod_h2.
>
> This is intended to assure that streams and their requests are
> destroyed
> in the right order when the connection is shut down.
>
> Connection shutdown can happen at any time during request processing
> and
> this makes it hard to reproduce issues in test cases. We have load
> tests
> with well-behaving clients. Tests with mis-behaving ones are the
> tricky part.
>
> It would be helpful if you could try
> https://github.com/icing/mod_h2/releases/tag/v2.0.15 ;
> on your system, to see how that is faring.
>
> Kind Regards,
> Stefan
>
> > Am 17.05.2023 um 12:24 schrieb Bastien Durel <bastien@durel.org>:
> >
> > Package: apache2
> > Version: 2.4.56-1~deb11u2
> > Severity: important
> >
> > Dear Maintainer,
> >
> > I see many segmentation faults in apache2, for exemple in the last
> > 24h I got:
> >
> > Tue 2023-05-16 13:40:59 CEST 775740 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 13:52:44 CEST 798329 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 16:15:46 CEST 810709 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 16:28:55 CEST 817483 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 17:59:23 CEST 823129 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 18:35:50 CEST 826974 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 18:44:15 CEST 831974 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 18:44:56 CEST 836174 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 18:54:56 CEST 822618 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 21:12:28 CEST 836246 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 21:21:10 CEST 853959 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 22:04:42 CEST 858749 33 33 11 present
> > /usr/sbin/apache2
> > Tue 2023-05-16 23:26:09 CEST 866610 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 00:08:42 CEST 865968 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 00:24:04 CEST 874807 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 00:47:25 CEST 878675 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 01:42:14 CEST 877580 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 09:21:02 CEST 949781 33 33 11 present
> > /usr/sbin/apache2
> > Wed 2023-05-17 09:50:49 CEST 954784 33 33 11 present
> > /usr/sbin/apache2
> >
> > All crashes I looked into are at the same function:
> > purge_consumed_buckets at h2_bucket_beam.c:159
> >
> > Here is the output of the "bt full" command from the core:
> >
> > #0 0x00007ffb03778981 in purge_consumed_buckets
> > (beam=beam@entry=0x7ffae452c0a0) at h2_bucket_beam.c:159
> > b = 0x7ffae45ea108
> > #1 0x00007ffb03778aaf in beam_shutdown
> > (how=APR_SHUTDOWN_READWRITE, beam=<optimized out>) at
> > h2_bucket_beam.c:258
> > No locals.
> > #2 beam_shutdown (how=APR_SHUTDOWN_READWRITE, beam=0x7ffae452c0a0)
> > at h2_bucket_beam.c:242
> > No locals.
> > #3 beam_cleanup (data=0x7ffae452c0a0) at h2_bucket_beam.c:265
> > beam = 0x7ffae452c0a0
> > #4 0x00007ffb03e6780e in run_cleanups (cref=0x7ffae452c098) at
> > ./memory/unix/apr_pools.c:2629
> > c = <optimized out>
> > c = <optimized out>
> > #5 apr_pool_destroy (pool=0x7ffae452c028) at
> > ./memory/unix/apr_pools.c:987
> > active = <optimized out>
> > allocator = <optimized out>
> > #6 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4530028) at
> > ./memory/unix/apr_pools.c:997
> > active = <optimized out>
> > allocator = <optimized out>
> > #7 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4551028) at
> > ./memory/unix/apr_pools.c:997
> > active = <optimized out>
> > allocator = <optimized out>
> > #8 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae45a1028) at
> > ./memory/unix/apr_pools.c:997
> > active = <optimized out>
> > allocator = <optimized out>
> > #9 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4606028) at
> > ./memory/unix/apr_pools.c:997
> > active = <optimized out>
> > allocator = <optimized out>
> > #10 0x00007ffb037914c5 in h2_session_pre_close
> > (session=0x7ffae46060a0, async=<optimized out>) at
> > h2_session.c:1988
> > status = 0
> > #11 0x00007ffb0377b745 in h2_c1_pre_close (ctx=<optimized out>,
> > c=<optimized out>) at h2_c1.c:180
> > status = <optimized out>
> > conn_ctx = <optimized out>
> > #12 0x000056438478c9b0 in ap_run_pre_close_connection
> > (c=c@entry=0x7ffae4614360) at connection.c:44
> > pHook = <optimized out>
> > n = 0
> > rv = 0
> > #13 0x000056438478cade in ap_prep_lingering_close
> > (c=0x7ffae4614360) at connection.c:101
> > No locals.
> > #14 ap_start_lingering_close (c=0x7ffae4614360) at connection.c:127
> > csd = 0x7ffae46140b0
> > #15 0x00007ffb03b08abe in process_lingering_close
> > (cs=0x7ffae46142b0) at event.c:1500
> > csd = 0x7ffae46140b0
> > dummybuf =
> > "\027\003\003\000\023\067\020\251\027\003\215Re\345\310{\f8\312X\33
> > 2N\310\375", '\000' <repeats 17385 times>...
> > nbytes = 0
> > rv = <optimized out>
> > q = <optimized out>
> > #16 0x00007ffb03b0a512 in process_socket
> > (thd=thd@entry=0x7ffb037345c8, p=<optimized out>, sock=<optimized
> > out>, cs=<optimized out>, my_child_num=my_child_num@entry=3,
> > my_thread_num=my_thread_num@entry=16) at event.c:1238
> > c = <optimized out>
> > conn_id = <optimized out>
> > clogging = <optimized out>
> > rv = <optimized out>
> > rc = <optimized out>
> > #17 0x00007ffb03b0b125 in worker_thread (thd=0x7ffb037345c8,
> > dummy=<optimized out>) at event.c:2199
> > csd = 0x7ffae46140b0
> > cs = 0x7ffae46142b0
> > te = 0x0
> > ptrans = 0x0
> > ti = <optimized out>
> > process_slot = -855667096
> > thread_slot = 16
> > rv = <optimized out>
> > is_idle = 0
> > #18 0x00007ffb03e2aea7 in start_thread (arg=<optimized out>) at
> > pthread_create.c:477
> > ret = <optimized out>
> > pd = <optimized out>
> > unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140715157853952,
> > -1517716079030320448, 140715846122926, 140715846122927,
> > 140715157852032, 8396800, 1520638580441989824,
> > 1520521782042673856}, mask_was_saved = 0}}, priv = {pad = {0x0,
> > 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0,
> > canceltype = 0}}}
> > not_first_call = 0
> > #19 0x00007ffb03d4aa2f in clone () at
> > ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
> >
> > It may - or not - be related with the fact I got hundreds (had been
> > hundreds of thousands) of persistent connections from random IR
> > hosts to my https server, I guess it can put more stress on h2
> > cleanup functions that the normal (quite low) usage I had before.
> >
> > I also seen that this function was corrected a few days ago :
> > https://github.com/icing/mod_h2/commit/ff00b3fdff368b225e70c61ca0fefdbd3d83f6de
> > I don't know enough of apache2's codebase to see if it may has an
> > impact, but it seems unlikely to me; as the AP_BUCKET_IS_EOR()
> > check seems related to a newly introduced member
> >
> > Regards,
> >
> > -- Package-specific info:
> >
> > -- System Information:
> > Debian Release: 11.7
> > APT prefers stable-updates
> > APT policy: (500, 'stable-updates'), (500, 'stable-security'),
> > (500, 'stable-debug'), (500, 'oldstable-updates'), (500,
> > 'oldstable-debug'), (500, 'stable'), (500, 'oldstable')
> > Architecture: amd64 (x86_64)
> >
> > Kernel: Linux 5.10.0-23-amd64 (SMP w/8 CPU threads)
> > Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8),
> > LANGUAGE not set
> > Shell: /bin/sh linked to /bin/dash
> > Init: systemd (via /run/systemd/system)
> >
> > Versions of packages apache2 depends on:
> > ii apache2-bin 2.4.56-1~deb11u2
> > ii apache2-data 2.4.56-1~deb11u2
> > ii apache2-utils 2.4.56-1~deb11u2
> > ii dpkg 1.20.12
> > ii init-system-helpers 1.60
> > ii lsb-base 11.1.0
> > ii mime-support 3.66
> > ii perl 5.32.1-4+deb11u2
> > ii procps 2:3.3.17-5
> >
> > Versions of packages apache2 recommends:
> > ii ssl-cert 1.1.0+nmu1
> >
> > Versions of packages apache2 suggests:
> > pn apache2-doc <none>
> > ii apache2-suexec-pristine 2.4.56-1~deb11u2
> > ii chimera2 [www-browser] 2.0a19-8+b2
> > ii dillo [www-browser] 3.0.5-7
> > ii links2 [www-browser] 2.21-1+b1
> > ii lynx [www-browser] 2.9.0dev.6-3~deb11u1
> >
> > Versions of packages apache2-bin depends on:
> > ii libapr1 1.7.0-6+deb11u2
> > ii libaprutil1 1.6.1-5+deb11u1
> > ii libaprutil1-dbd-sqlite3 1.6.1-5+deb11u1
> > ii libaprutil1-ldap 1.6.1-5+deb11u1
> > ii libbrotli1 1.0.9-2+b2
> > ii libc6 2.31-13+deb11u6
> > ii libcrypt1 1:4.4.18-4
> > ii libcurl4 7.74.0-1.3+deb11u7
> > ii libjansson4 2.13.1-1.1
> > ii libldap-2.4-2 2.4.57+dfsg-3+deb11u1
> > ii liblua5.3-0 5.3.3-1.1+b1
> > ii libnghttp2-14 1.43.0-1
> > ii libpcre3 2:8.44-
> > 2+0~20210301.9+debian11~1.gbpa278ad
> > ii libssl1.1 1.1.1n-0+deb11u4
> > ii libxml2 2.9.14+dfsg-
> > 0.1+0~20230421.14+debian11~1.gbpf14485
> > ii perl 5.32.1-4+deb11u2
> > ii zlib1g 1:1.2.11.dfsg-2+deb11u2
> >
> > Versions of packages apache2-bin suggests:
> > pn apache2-doc <none>
> > ii apache2-suexec-pristine 2.4.56-1~deb11u2
> > ii chimera2 [www-browser] 2.0a19-8+b2
> > ii dillo [www-browser] 3.0.5-7
> > ii links2 [www-browser] 2.21-1+b1
> > ii lynx [www-browser] 2.9.0dev.6-3~deb11u1
> >
> > Versions of packages apache2 is related to:
> > ii apache2 2.4.56-1~deb11u2
> > ii apache2-bin 2.4.56-1~deb11u2
> >
> > -- Configuration Files:
> > /etc/apache2/apache2.conf changed [not included]
> > /etc/apache2/conf-available/security.conf changed [not included]
> > /etc/apache2/envvars changed [not included]
> > /etc/apache2/mods-available/cern_meta.load [Errno 2] Aucun fichier
> > ou dossier de ce type: '/etc/apache2/mods-available/cern_meta.load'
> > /etc/apache2/mods-available/ident.load [Errno 2] Aucun fichier ou
> > dossier de ce type: '/etc/apache2/mods-available/ident.load'
> > /etc/apache2/mods-available/imagemap.load [Errno 2] Aucun fichier
> > ou dossier de ce type: '/etc/apache2/mods-available/imagemap.load'
> > /etc/apache2/mods-available/proxy_hcheck.load [Errno 2] Aucun
> > fichier ou dossier de ce type: '/etc/apache2/mods-
> > available/proxy_hcheck.load'
> > /etc/apache2/mods-available/proxy_html.load changed [not included]
> > /etc/apache2/mods-available/proxy_http2.load changed [not included]
> > /etc/apache2/mods-available/userdir.conf changed [not included]
> > /etc/apache2/ports.conf changed [not included]
> > /etc/apache2/sites-available/000-default.conf changed [not
> > included]
> > /etc/apache2/sites-available/default-ssl.conf changed [not
> > included]
> > /etc/cron.daily/apache2 changed [not included]
> > /etc/default/apache-htcacheclean [Errno 2] Aucun fichier ou dossier
> > de ce type: '/etc/default/apache-htcacheclean'
> > /etc/init.d/apache2 changed [not included]
> > /etc/logrotate.d/apache2 changed [not included]
> >
> > -- no debconf information
> >
>
--
Bastien
Reply to: