Bug#1036213: apache2: frequent SIGSEGV in mod_http2.so (purge_consumed_buckets)
Did you have an warning message like "AH03516: unexpected NN streams in hold" at that time in out error log?
> Am 18.05.2023 um 11:04 schrieb Bastien Durel <bastien@durel.org>:
>
> Hello,
>
> I ran with /usr/lib/apache2/modules/mod_http2_2.0.15.so &
> /usr/lib/apache2/modules/mod_proxy_http2_2.0.15.so since yesterday
> 15:20, and got a SIGSEGV at 21:29:30 : it's not in
> purge_consumed_buckets but it's in the h2_proxy stack ...
>
> Here is the bt full of the core:
>
> #0 0x00007f9bb0464efe in ssl_io_filter_output (f=0x7f9ba02088d8, bb=0x7f9ba04cf3b8) at ssl_engine_io.c:1963
> bucket = 0x7f9ba04c4108
> status = 0
> filter_ctx = 0x7f9ba0208880
> inctx = <optimized out>
> outctx = 0x7f9ba0208900
> rblock = <optimized out>
> #1 0x00007f9bb0597999 in proxy_pass_brigade (flush=1, bb=<optimized out>, origin=<optimized out>, p_conn=0x7f9ba020a0a0, bucket_alloc=<optimized out>) at h2_proxy_session.c:218
> status = <optimized out>
> transferred = 9
> status = <optimized out>
> transferred = <optimized out>
> e = <optimized out>
> ap__b = <optimized out>
> #2 raw_send (ngh2=<optimized out>, data=<optimized out>, length=9, flags=<optimized out>, user_data=0x7f9ba04cf190) at h2_proxy_session.c:244
> session = 0x7f9ba04cf190
> b = <optimized out>
> status = <optimized out>
> flush = 1
> #3 0x00007f9bb14a11f9 in nghttp2_session_send () from /usr/lib/x86_64-linux-gnu/libnghttp2.so.14
> No symbol table info available.
> #4 0x00007f9bb059a9b9 in send_loop (session=0x7f9ba04cf190) at h2_proxy_session.c:1517
> rv = <optimized out>
> #5 h2_proxy_session_process (session=0x7f9ba04cf190) at h2_proxy_session.c:1553
> status = <optimized out>
> have_written = 0
> have_read = 0
> run_loop = <optimized out>
> #6 0x00007f9bb059db44 in ctx_run (ctx=0x7f9ba06014e0) at mod_proxy_http2.c:258
> status = 0
> h2_front = <optimized out>
> status = <optimized out>
> h2_front = <optimized out>
> out = <optimized out>
> #7 proxy_http2_handler (r=<optimized out>, worker=<optimized out>, conf=<optimized out>, url=<optimized out>, proxyname=<optimized out>, proxyport=<optimized out>) at mod_proxy_http2.c:405
> proxy_func = <optimized out>
> locurl = 0x7f9ba0601610 "/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
> u = <optimized out>
> slen = <optimized out>
> is_ssl = <optimized out>
> status = 0
> ctx = 0x7f9ba06014e0
> uri = {scheme = 0x7f9ba0601580 "h2", hostinfo = 0x7f9ba0601588 "doh.geekwu.org:5343", user = 0x0, password = 0x0, hostname = 0x7f9ba06015a0 "doh.geekwu.org", port_str = 0x7f9ba06015b0 "5343", path = 0x7f9ba06015b8 "/dns-query",
> query = 0x7f9ba06015c8 "dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB", fragment = 0x0, hostent = 0x0, port = 5343, is_initialized = 1, dns_looked_up = 0, dns_resolved = 0}
> reconnects = <optimized out>
> run_connect = <optimized out>
> #8 0x00007f9bb0538943 in proxy_run_scheme_handler (r=r@entry=0x7f9ba05910a0, worker=0x7f9bb15cbbb0, conf=conf@entry=0x7f9bb15cba08,
> url=0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB", proxyhost=proxyhost@entry=0x0, proxyport=proxyport@entry=0) at mod_proxy.c:3437
> pHook = <optimized out>
> n = 3
> rv = -1
> #9 0x00007f9bb053a9d2 in proxy_handler (r=0x7f9ba05910a0) at mod_proxy.c:1510
> url = 0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
> uri = 0x7f9ba060134e "h2://doh.geekwu.org:5343/dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB"
> scheme = 0x7f9ba0601470 "h2"
> p = <optimized out>
> p2 = 0x7f9ba0208300 "(\200 \240\233\177"
> sconf = <optimized out>
> conf = 0x7f9bb15cba08
> proxies = 0x7f9bb15cbac8
> ents = 0x7f9bb05b4688
> i = <optimized out>
> rc = <optimized out>
> access_status = 0
> direct_connect = <optimized out>
> str = <optimized out>
> maxfwd = <optimized out>
> balancer = 0x0
> worker = 0x7f9bb15cbbb0
> attempts = 0
> max_attempts = 0
> list = <optimized out>
> saved_status = <optimized out>
> #10 0x0000556306b489d0 in ap_run_handler (r=r@entry=0x7f9ba05910a0) at config.c:169
> pHook = <optimized out>
> n = 1
> rv = -1
> #11 0x0000556306b48fc6 in ap_invoke_handler (r=r@entry=0x7f9ba05910a0) at config.c:443
> handler = <optimized out>
> p = <optimized out>
> result = 0
> old_handler = 0x7f9bb054930b "proxy-server"
> ignore = <optimized out>
> #12 0x0000556306b619db in ap_process_async_request (r=r@entry=0x7f9ba05910a0) at http_request.c:452
> c = 0x7f9ba06430a0
> access_status = 0
> #13 0x0000556306b61c1e in ap_process_request (r=r@entry=0x7f9ba05910a0) at http_request.c:487
> bb = 0x7f9ba06434b0
> b = <optimized out>
> c = 0x7f9ba06430a0
> rv = <optimized out>
> #14 0x00007f9bb116bbe6 in c2_process (c=0x7f9ba06430a0, conn_ctx=0x7f9ba06434b0) at h2_c2.c:723
> cs = 0x7f9ba0643658
> tenc = <optimized out>
> timeout = <optimized out>
> req = 0x7f9ba04f7180
> r = 0x7f9ba05910a0
> cleanup = <optimized out>
> req = <optimized out>
> cs = <optimized out>
> r = <optimized out>
> tenc = <optimized out>
> timeout = <optimized out>
> cleanup = <optimized out>
> #15 h2_c2_hook_process (c=0x7f9ba06430a0) at h2_c2.c:840
> ctx = 0x7f9ba06434b0
> #16 0x0000556306b52730 in ap_run_process_connection (c=c@entry=0x7f9ba06430a0) at connection.c:42
> pHook = <optimized out>
> n = 0
> rv = -1
> #17 0x00007f9bb116c4d7 in h2_c2_process (c2=c2@entry=0x7f9ba06430a0, thread=thread@entry=0x7f9bafdfb0b0, worker_id=<optimized out>) at h2_c2.c:658
> conn_ctx = 0x7f9ba06434b0
> #18 0x00007f9bb1187567 in slot_run (thread=0x7f9bafdfb0b0, wctx=0x7f9bb0511850) at h2_workers.c:299
> slot = 0x7f9bb0511850
> workers = 0x7f9bb05110a0
> c = 0x7f9ba06430a0
> rv = <optimized out>
> #19 0x00007f9bb1844ea7 in start_thread (arg=<optimized out>) at pthread_create.c:477
> ret = <optimized out>
> pd = <optimized out>
> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140306500486912, 3999667409471690328, 140722438679950, 140722438679951, 140306500484992, 8396800, -3983747864113949096, -3983768835883903400}, mask_was_saved = 0}}, priv = {pad = {
> 0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}}
> not_first_call = 0
> #20 0x00007f9bb1764a2f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
>
>
> The request itself seems legit (I can run it without crashing the
> server).
>
> The last log entry before 21:29:30 for this query is a few minutes
> before the crash :
>
> /var/log/apache2/access.log:37.98.109.173 - - [17/May/2023:21:26:57 +0200] "GET /dns-query?dns=AAABAAABAAAAAAAADHNhZmVicm93c2luZwpnb29nbGVhcGlzA2NvbQAAHAAB HTTP/2.0" 200 103 "-" "-"
>
> At this time, I have 23 ESTABLISHED (no TIME_WAIT) connections from
> this IP, with no requests in access log since yesterday ; so I guess
> it's a mis-behaving client
>
> Regards,
>
> Le mercredi 17 mai 2023 à 13:24 +0200, Stefan Eissing a écrit :
>> Sorry to hear about this. I think the recent change that could be
>> relevant here is
>> the addition of:
>>
>> h2_mplx.c#515: c1_purge_streams(m);
>>
>> as seen in Apache httpd trunk and at https://github.com/icing/mod_h2.
>>
>> This is intended to assure that streams and their requests are
>> destroyed
>> in the right order when the connection is shut down.
>>
>> Connection shutdown can happen at any time during request processing
>> and
>> this makes it hard to reproduce issues in test cases. We have load
>> tests
>> with well-behaving clients. Tests with mis-behaving ones are the
>> tricky part.
>>
>> It would be helpful if you could try
>> https://github.com/icing/mod_h2/releases/tag/v2.0.15
>> on your system, to see how that is faring.
>>
>> Kind Regards,
>> Stefan
>>
>>> Am 17.05.2023 um 12:24 schrieb Bastien Durel <bastien@durel.org>:
>>>
>>> Package: apache2
>>> Version: 2.4.56-1~deb11u2
>>> Severity: important
>>>
>>> Dear Maintainer,
>>>
>>> I see many segmentation faults in apache2, for exemple in the last
>>> 24h I got:
>>>
>>> Tue 2023-05-16 13:40:59 CEST 775740 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 13:52:44 CEST 798329 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 16:15:46 CEST 810709 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 16:28:55 CEST 817483 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 17:59:23 CEST 823129 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 18:35:50 CEST 826974 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 18:44:15 CEST 831974 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 18:44:56 CEST 836174 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 18:54:56 CEST 822618 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 21:12:28 CEST 836246 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 21:21:10 CEST 853959 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 22:04:42 CEST 858749 33 33 11 present
>>> /usr/sbin/apache2
>>> Tue 2023-05-16 23:26:09 CEST 866610 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 00:08:42 CEST 865968 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 00:24:04 CEST 874807 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 00:47:25 CEST 878675 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 01:42:14 CEST 877580 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 09:21:02 CEST 949781 33 33 11 present
>>> /usr/sbin/apache2
>>> Wed 2023-05-17 09:50:49 CEST 954784 33 33 11 present
>>> /usr/sbin/apache2
>>>
>>> All crashes I looked into are at the same function:
>>> purge_consumed_buckets at h2_bucket_beam.c:159
>>>
>>> Here is the output of the "bt full" command from the core:
>>>
>>> #0 0x00007ffb03778981 in purge_consumed_buckets
>>> (beam=beam@entry=0x7ffae452c0a0) at h2_bucket_beam.c:159
>>> b = 0x7ffae45ea108
>>> #1 0x00007ffb03778aaf in beam_shutdown
>>> (how=APR_SHUTDOWN_READWRITE, beam=<optimized out>) at
>>> h2_bucket_beam.c:258
>>> No locals.
>>> #2 beam_shutdown (how=APR_SHUTDOWN_READWRITE, beam=0x7ffae452c0a0)
>>> at h2_bucket_beam.c:242
>>> No locals.
>>> #3 beam_cleanup (data=0x7ffae452c0a0) at h2_bucket_beam.c:265
>>> beam = 0x7ffae452c0a0
>>> #4 0x00007ffb03e6780e in run_cleanups (cref=0x7ffae452c098) at
>>> ./memory/unix/apr_pools.c:2629
>>> c = <optimized out>
>>> c = <optimized out>
>>> #5 apr_pool_destroy (pool=0x7ffae452c028) at
>>> ./memory/unix/apr_pools.c:987
>>> active = <optimized out>
>>> allocator = <optimized out>
>>> #6 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4530028) at
>>> ./memory/unix/apr_pools.c:997
>>> active = <optimized out>
>>> allocator = <optimized out>
>>> #7 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4551028) at
>>> ./memory/unix/apr_pools.c:997
>>> active = <optimized out>
>>> allocator = <optimized out>
>>> #8 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae45a1028) at
>>> ./memory/unix/apr_pools.c:997
>>> active = <optimized out>
>>> allocator = <optimized out>
>>> #9 0x00007ffb03e6782d in apr_pool_destroy (pool=0x7ffae4606028) at
>>> ./memory/unix/apr_pools.c:997
>>> active = <optimized out>
>>> allocator = <optimized out>
>>> #10 0x00007ffb037914c5 in h2_session_pre_close
>>> (session=0x7ffae46060a0, async=<optimized out>) at
>>> h2_session.c:1988
>>> status = 0
>>> #11 0x00007ffb0377b745 in h2_c1_pre_close (ctx=<optimized out>,
>>> c=<optimized out>) at h2_c1.c:180
>>> status = <optimized out>
>>> conn_ctx = <optimized out>
>>> #12 0x000056438478c9b0 in ap_run_pre_close_connection
>>> (c=c@entry=0x7ffae4614360) at connection.c:44
>>> pHook = <optimized out>
>>> n = 0
>>> rv = 0
>>> #13 0x000056438478cade in ap_prep_lingering_close
>>> (c=0x7ffae4614360) at connection.c:101
>>> No locals.
>>> #14 ap_start_lingering_close (c=0x7ffae4614360) at connection.c:127
>>> csd = 0x7ffae46140b0
>>> #15 0x00007ffb03b08abe in process_lingering_close
>>> (cs=0x7ffae46142b0) at event.c:1500
>>> csd = 0x7ffae46140b0
>>> dummybuf =
>>> "\027\003\003\000\023\067\020\251\027\003\215Re\345\310{\f8\312X\33
>>> 2N\310\375", '\000' <repeats 17385 times>...
>>> nbytes = 0
>>> rv = <optimized out>
>>> q = <optimized out>
>>> #16 0x00007ffb03b0a512 in process_socket
>>> (thd=thd@entry=0x7ffb037345c8, p=<optimized out>, sock=<optimized
>>> out>, cs=<optimized out>, my_child_num=my_child_num@entry=3,
>>> my_thread_num=my_thread_num@entry=16) at event.c:1238
>>> c = <optimized out>
>>> conn_id = <optimized out>
>>> clogging = <optimized out>
>>> rv = <optimized out>
>>> rc = <optimized out>
>>> #17 0x00007ffb03b0b125 in worker_thread (thd=0x7ffb037345c8,
>>> dummy=<optimized out>) at event.c:2199
>>> csd = 0x7ffae46140b0
>>> cs = 0x7ffae46142b0
>>> te = 0x0
>>> ptrans = 0x0
>>> ti = <optimized out>
>>> process_slot = -855667096
>>> thread_slot = 16
>>> rv = <optimized out>
>>> is_idle = 0
>>> #18 0x00007ffb03e2aea7 in start_thread (arg=<optimized out>) at
>>> pthread_create.c:477
>>> ret = <optimized out>
>>> pd = <optimized out>
>>> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140715157853952,
>>> -1517716079030320448, 140715846122926, 140715846122927,
>>> 140715157852032, 8396800, 1520638580441989824,
>>> 1520521782042673856}, mask_was_saved = 0}}, priv = {pad = {0x0,
>>> 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0,
>>> canceltype = 0}}}
>>> not_first_call = 0
>>> #19 0x00007ffb03d4aa2f in clone () at
>>> ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
>>>
>>> It may - or not - be related with the fact I got hundreds (had been
>>> hundreds of thousands) of persistent connections from random IR
>>> hosts to my https server, I guess it can put more stress on h2
>>> cleanup functions that the normal (quite low) usage I had before.
>>>
>>> I also seen that this function was corrected a few days ago :
>>> https://github.com/icing/mod_h2/commit/ff00b3fdff368b225e70c61ca0fefdbd3d83f6de
>>> I don't know enough of apache2's codebase to see if it may has an
>>> impact, but it seems unlikely to me; as the AP_BUCKET_IS_EOR()
>>> check seems related to a newly introduced member
>>>
>>> Regards,
>>>
>>> -- Package-specific info:
>>>
>>> -- System Information:
>>> Debian Release: 11.7
>>> APT prefers stable-updates
>>> APT policy: (500, 'stable-updates'), (500, 'stable-security'),
>>> (500, 'stable-debug'), (500, 'oldstable-updates'), (500,
>>> 'oldstable-debug'), (500, 'stable'), (500, 'oldstable')
>>> Architecture: amd64 (x86_64)
>>>
>>> Kernel: Linux 5.10.0-23-amd64 (SMP w/8 CPU threads)
>>> Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8),
>>> LANGUAGE not set
>>> Shell: /bin/sh linked to /bin/dash
>>> Init: systemd (via /run/systemd/system)
>>>
>>> Versions of packages apache2 depends on:
>>> ii apache2-bin 2.4.56-1~deb11u2
>>> ii apache2-data 2.4.56-1~deb11u2
>>> ii apache2-utils 2.4.56-1~deb11u2
>>> ii dpkg 1.20.12
>>> ii init-system-helpers 1.60
>>> ii lsb-base 11.1.0
>>> ii mime-support 3.66
>>> ii perl 5.32.1-4+deb11u2
>>> ii procps 2:3.3.17-5
>>>
>>> Versions of packages apache2 recommends:
>>> ii ssl-cert 1.1.0+nmu1
>>>
>>> Versions of packages apache2 suggests:
>>> pn apache2-doc <none>
>>> ii apache2-suexec-pristine 2.4.56-1~deb11u2
>>> ii chimera2 [www-browser] 2.0a19-8+b2
>>> ii dillo [www-browser] 3.0.5-7
>>> ii links2 [www-browser] 2.21-1+b1
>>> ii lynx [www-browser] 2.9.0dev.6-3~deb11u1
>>>
>>> Versions of packages apache2-bin depends on:
>>> ii libapr1 1.7.0-6+deb11u2
>>> ii libaprutil1 1.6.1-5+deb11u1
>>> ii libaprutil1-dbd-sqlite3 1.6.1-5+deb11u1
>>> ii libaprutil1-ldap 1.6.1-5+deb11u1
>>> ii libbrotli1 1.0.9-2+b2
>>> ii libc6 2.31-13+deb11u6
>>> ii libcrypt1 1:4.4.18-4
>>> ii libcurl4 7.74.0-1.3+deb11u7
>>> ii libjansson4 2.13.1-1.1
>>> ii libldap-2.4-2 2.4.57+dfsg-3+deb11u1
>>> ii liblua5.3-0 5.3.3-1.1+b1
>>> ii libnghttp2-14 1.43.0-1
>>> ii libpcre3 2:8.44-
>>> 2+0~20210301.9+debian11~1.gbpa278ad
>>> ii libssl1.1 1.1.1n-0+deb11u4
>>> ii libxml2 2.9.14+dfsg-
>>> 0.1+0~20230421.14+debian11~1.gbpf14485
>>> ii perl 5.32.1-4+deb11u2
>>> ii zlib1g 1:1.2.11.dfsg-2+deb11u2
>>>
>>> Versions of packages apache2-bin suggests:
>>> pn apache2-doc <none>
>>> ii apache2-suexec-pristine 2.4.56-1~deb11u2
>>> ii chimera2 [www-browser] 2.0a19-8+b2
>>> ii dillo [www-browser] 3.0.5-7
>>> ii links2 [www-browser] 2.21-1+b1
>>> ii lynx [www-browser] 2.9.0dev.6-3~deb11u1
>>>
>>> Versions of packages apache2 is related to:
>>> ii apache2 2.4.56-1~deb11u2
>>> ii apache2-bin 2.4.56-1~deb11u2
>>>
>>> -- Configuration Files:
>>> /etc/apache2/apache2.conf changed [not included]
>>> /etc/apache2/conf-available/security.conf changed [not included]
>>> /etc/apache2/envvars changed [not included]
>>> /etc/apache2/mods-available/cern_meta.load [Errno 2] Aucun fichier
>>> ou dossier de ce type: '/etc/apache2/mods-available/cern_meta.load'
>>> /etc/apache2/mods-available/ident.load [Errno 2] Aucun fichier ou
>>> dossier de ce type: '/etc/apache2/mods-available/ident.load'
>>> /etc/apache2/mods-available/imagemap.load [Errno 2] Aucun fichier
>>> ou dossier de ce type: '/etc/apache2/mods-available/imagemap.load'
>>> /etc/apache2/mods-available/proxy_hcheck.load [Errno 2] Aucun
>>> fichier ou dossier de ce type: '/etc/apache2/mods-
>>> available/proxy_hcheck.load'
>>> /etc/apache2/mods-available/proxy_html.load changed [not included]
>>> /etc/apache2/mods-available/proxy_http2.load changed [not included]
>>> /etc/apache2/mods-available/userdir.conf changed [not included]
>>> /etc/apache2/ports.conf changed [not included]
>>> /etc/apache2/sites-available/000-default.conf changed [not
>>> included]
>>> /etc/apache2/sites-available/default-ssl.conf changed [not
>>> included]
>>> /etc/cron.daily/apache2 changed [not included]
>>> /etc/default/apache-htcacheclean [Errno 2] Aucun fichier ou dossier
>>> de ce type: '/etc/default/apache-htcacheclean'
>>> /etc/init.d/apache2 changed [not included]
>>> /etc/logrotate.d/apache2 changed [not included]
>>>
>>> -- no debconf information
>>>
>>
>
> --
> Bastien
>
Reply to: