Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/crypto/openssl/ssl/quic/quic_ackm.c
108604 views
1
/*
2
* Copyright 2022-2025 The OpenSSL Project Authors. All Rights Reserved.
3
*
4
* Licensed under the Apache License 2.0 (the "License"). You may not use
5
* this file except in compliance with the License. You can obtain a copy
6
* in the file LICENSE in the source distribution or at
7
* https://www.openssl.org/source/license.html
8
*/
9
10
#include "internal/quic_ackm.h"
11
#include "internal/uint_set.h"
12
#include "internal/common.h"
13
#include <assert.h>
14
15
DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT);
16
17
/*
18
* TX Packet History
19
* *****************
20
*
21
* The TX Packet History object tracks information about packets which have been
22
* sent for which we later expect to receive an ACK. It is essentially a simple
23
* database keeping a list of packet information structures in packet number
24
* order which can also be looked up directly by packet number.
25
*
26
* We currently only allow packets to be appended to the list (i.e. the packet
27
* numbers of the packets appended to the list must monotonically increase), as
28
* we should not currently need more general functionality such as a sorted list
29
* insert.
30
*/
31
struct tx_pkt_history_st {
32
/* A linked list of all our packets. */
33
OSSL_LIST(tx_history)
34
packets;
35
36
/*
37
* Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
38
*
39
* Invariant: A packet is in this map if and only if it is in the linked
40
* list.
41
*/
42
LHASH_OF(OSSL_ACKM_TX_PKT) *map;
43
44
/*
45
* The lowest packet number which may currently be added to the history list
46
* (inclusive). We do not allow packet numbers to be added to the history
47
* list non-monotonically, so packet numbers must be greater than or equal
48
* to this value.
49
*/
50
uint64_t watermark;
51
52
/*
53
* Packet number of the highest packet info structure we have yet appended
54
* to the list. This is usually one less than watermark, except when we have
55
* not added any packet yet.
56
*/
57
uint64_t highest_sent;
58
};
59
60
DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);
61
62
static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
63
{
64
/* Using low bits of the packet number as the hash should be enough */
65
return (unsigned long)pkt->pkt_num;
66
}
67
68
static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
69
const OSSL_ACKM_TX_PKT *b)
70
{
71
if (a->pkt_num < b->pkt_num)
72
return -1;
73
if (a->pkt_num > b->pkt_num)
74
return 1;
75
return 0;
76
}
77
78
static int
79
tx_pkt_history_init(struct tx_pkt_history_st *h)
80
{
81
ossl_list_tx_history_init(&h->packets);
82
h->watermark = 0;
83
h->highest_sent = 0;
84
85
h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
86
if (h->map == NULL)
87
return 0;
88
89
return 1;
90
}
91
92
static void
93
tx_pkt_history_destroy(struct tx_pkt_history_st *h)
94
{
95
lh_OSSL_ACKM_TX_PKT_free(h->map);
96
h->map = NULL;
97
ossl_list_tx_history_init(&h->packets);
98
}
99
100
static int
101
tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
102
OSSL_ACKM_TX_PKT *pkt)
103
{
104
OSSL_ACKM_TX_PKT *existing;
105
106
/*
107
* There should not be any existing packet with this number
108
* in our mapping.
109
*/
110
existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
111
if (!ossl_assert(existing == NULL))
112
return 0;
113
114
/* Should not already be in a list. */
115
if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL
116
&& ossl_list_tx_history_prev(pkt) == NULL))
117
return 0;
118
119
lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);
120
121
ossl_list_tx_history_insert_tail(&h->packets, pkt);
122
return 1;
123
}
124
125
/* Adds a packet information structure to the history list. */
126
static int
127
tx_pkt_history_add(struct tx_pkt_history_st *h,
128
OSSL_ACKM_TX_PKT *pkt)
129
{
130
if (!ossl_assert(pkt->pkt_num >= h->watermark))
131
return 0;
132
133
if (tx_pkt_history_add_actual(h, pkt) < 1)
134
return 0;
135
136
h->watermark = pkt->pkt_num + 1;
137
h->highest_sent = pkt->pkt_num;
138
return 1;
139
}
140
141
/* Retrieve a packet information structure by packet number. */
142
static OSSL_ACKM_TX_PKT *
143
tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
144
{
145
OSSL_ACKM_TX_PKT key;
146
147
key.pkt_num = pkt_num;
148
149
return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
150
}
151
152
/* Remove a packet information structure from the history log. */
153
static int
154
tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
155
{
156
OSSL_ACKM_TX_PKT key, *pkt;
157
key.pkt_num = pkt_num;
158
159
pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
160
if (pkt == NULL)
161
return 0;
162
163
ossl_list_tx_history_remove(&h->packets, pkt);
164
lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
165
return 1;
166
}
167
168
/*
169
* RX Packet Number Tracking
170
* *************************
171
*
172
* **Background.** The RX side of the ACK manager must track packets we have
173
* received for which we have to generate ACK frames. Broadly, this means we
174
* store a set of packet numbers which we have received but which we do not know
175
* for a fact that the transmitter knows we have received.
176
*
177
* This must handle various situations:
178
*
179
* 1. We receive a packet but have not sent an ACK yet, so the transmitter
180
* does not know whether we have received it or not yet.
181
*
182
* 2. We receive a packet and send an ACK which is lost. We do not
183
* immediately know that the ACK was lost and the transmitter does not know
184
* that we have received the packet.
185
*
186
* 3. We receive a packet and send an ACK which is received by the
187
* transmitter. The transmitter does not immediately respond with an ACK,
188
* or responds with an ACK which is lost. The transmitter knows that we
189
* have received the packet, but we do not know for sure that it knows,
190
* because the ACK we sent could have been lost.
191
*
192
* 4. We receive a packet and send an ACK which is received by the
193
* transmitter. The transmitter subsequently sends us an ACK which confirms
194
* its receipt of the ACK we sent, and we successfully receive that ACK, so
195
* we know that the transmitter knows, that we received the original
196
* packet.
197
*
198
* Only when we reach case (4) are we relieved of any need to track a given
199
* packet number we have received, because only in this case do we know for sure
200
* that the peer knows we have received the packet. Having reached case (4) we
201
* will never again need to generate an ACK containing the PN in question, but
202
* until we reach that point, we must keep track of the PN as not having been
203
* provably ACKed, as we may have to keep generating ACKs for the given PN not
204
* just until the transmitter receives one, but until we know that it has
205
* received one. This will be referred to herein as "provably ACKed".
206
*
207
* **Duplicate handling.** The above discusses the case where we have received a
208
* packet with a given PN but are at best unsure whether the sender knows we
209
* have received it or not. However, we must also handle the case where we have
210
* yet to receive a packet with a given PN in the first place. The reason for
211
* this is because of the requirement expressed by RFC 9000 s. 12.3:
212
*
213
* "A receiver MUST discard a newly unprotected packet unless it is certain
214
* that it has not processed another packet with the same packet number from
215
* the same packet number space."
216
*
217
* We must ensure we never process a duplicate PN. As such, each possible PN we
218
* can receive must exist in one of the following logical states:
219
*
220
* - We have never processed this PN before
221
* (so if we receive such a PN, it can be processed)
222
*
223
* - We have processed this PN but it has not yet been provably ACKed
224
* (and should therefore be in any future ACK frame generated;
225
* if we receive such a PN again, it must be ignored)
226
*
227
* - We have processed this PN and it has been provably ACKed
228
* (if we receive such a PN again, it must be ignored)
229
*
230
* However, if we were to track this state for every PN ever used in the history
231
* of a connection, the amount of state required would increase unboundedly as
232
* the connection goes on (for example, we would have to store a set of every PN
233
* ever received.)
234
*
235
* RFC 9000 s. 12.3 continues:
236
*
237
* "Endpoints that track all individual packets for the purposes of detecting
238
* duplicates are at risk of accumulating excessive state. The data required
239
* for detecting duplicates can be limited by maintaining a minimum packet
240
* number below which all packets are immediately dropped."
241
*
242
* Moreover, RFC 9000 s. 13.2.3 states that:
243
*
244
* "A receiver MUST retain an ACK Range unless it can ensure that it will not
245
* subsequently accept packets with numbers in that range. Maintaining a
246
* minimum packet number that increases as ranges are discarded is one way to
247
* achieve this with minimal state."
248
*
249
* This touches on a subtlety of the original requirement quoted above: the
250
* receiver MUST discard a packet unless it is certain that it has not processed
251
* another packet with the same PN. However, this does not forbid the receiver
252
* from also discarding some PNs even though it has not yet processed them. In
253
* other words, implementations must be conservative and err in the direction of
254
* assuming a packet is a duplicate, but it is acceptable for this to come at
255
* the cost of falsely identifying some packets as duplicates.
256
*
257
* This allows us to bound the amount of state we must keep, and we adopt the
258
* suggested strategy quoted above to do so. We define a watermark PN below
259
* which all PNs are in the same state. This watermark is only ever increased.
260
* Thus the PNs the state for which needs to be explicitly tracked is limited to
261
* only a small number of recent PNs, and all older PNs have an assumed state.
262
*
263
* Any given PN thus falls into one of the following states:
264
*
265
* - (A) The PN is above the watermark but we have not yet received it.
266
*
267
* If we receive such a PN, we should process it and record the PN as
268
* received.
269
*
270
* - (B) The PN is above the watermark and we have received it.
271
*
272
* The PN should be included in any future ACK frame we generate.
273
* If we receive such a PN again, we should ignore it.
274
*
275
* - (C) The PN is below the watermark.
276
*
277
* We do not know whether a packet with the given PN was received or
278
* not. To be safe, if we receive such a packet, it is not processed.
279
*
280
* Note that state (C) corresponds to both "we have processed this PN and it has
281
* been provably ACKed" logical state and a subset of the PNs in the "we have
282
* never processed this PN before" logical state (namely all PNs which were lost
283
* and never received, but which are not recent enough to be above the
284
* watermark). The reason we can merge these states and avoid tracking states
285
* for the PNs in this state is because the provably ACKed and never-received
286
* states are functionally identical in terms of how we need to handle them: we
287
* don't need to do anything for PNs in either of these states, so we don't have
288
* to care about PNs in this state nor do we have to care about distinguishing
289
* the two states for a given PN.
290
*
291
* Note that under this scheme provably ACKed PNs are by definition always below
292
* the watermark; therefore, it follows that when a PN becomes provably ACKed,
293
* the watermark must be immediately increased to exceed it (otherwise we would
294
* keep reporting it in future ACK frames).
295
*
296
* This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
297
* to advance the watermark:
298
*
299
* "When a packet containing an ACK frame is sent, the Largest Acknowledged
300
* field in that frame can be saved. When a packet containing an ACK frame is
301
* acknowledged, the receiver can stop acknowledging packets less than or
302
* equal to the Largest Acknowledged field in the sent ACK frame."
303
*
304
* This is where our scheme's false positives arise. When a packet containing an
305
* ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
306
* acked, and the watermark is bumped accordingly. However, the Largest
307
* Acknowledged field does not imply that all lower PNs have been received,
308
* because there may be gaps expressed in the ranges of PNs expressed by that
309
* and previous ACK frames. Thus, some unreceived PNs may be moved below the
310
* watermark, and we may subsequently reject those PNs as possibly being
311
* duplicates even though we have not actually received those PNs. Since we bump
312
* the watermark when a PN becomes provably ACKed, it follows that an unreceived
313
* PN falls below the watermark (and thus becomes a false positive for the
314
* purposes of duplicate detection) when a higher-numbered PN becomes provably
315
* ACKed.
316
*
317
* Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
318
* n) will no longer be processed. Although datagrams may be reordered in the
319
* network, a PN we receive can only become provably ACKed after our own
320
* subsequently generated ACK frame is sent in a future TX packet, and then we
321
* receive another RX PN acknowledging that TX packet. This means that a given RX
322
* PN can only become provably ACKed at least 1 RTT after it is received; it is
323
* unlikely that any reordered datagrams will still be "in the network" (and not
324
* lost) by this time. If this does occur for whatever reason and a late PN is
325
* received, the packet will be discarded unprocessed and the PN is simply
326
* handled as though lost (a "written off" PN).
327
*
328
* **Data structure.** Our state for the RX handling side of the ACK manager, as
329
* discussed above, mainly comprises:
330
*
331
* a) a logical set of PNs, and
332
* b) a monotonically increasing PN counter (the watermark).
333
*
334
* For (a), we define a data structure which stores a logical set of PNs, which
335
* we use to keep track of which PNs we have received but which have not yet
336
* been provably ACKed, and thus will later need to generate an ACK frame for.
337
*
338
* The correspondence with the logical states discussed above is as follows. A
339
* PN is in state (C) if it is below the watermark; otherwise it is in state (B)
340
* if it is in the logical set of PNs, and in state (A) otherwise.
341
*
342
* Note that PNs are only removed from the PN set (when they become provably
343
* ACKed or written off) by virtue of advancement of the watermark. Removing PNs
344
* from the PN set any other way would be ambiguous as it would be
345
* indistinguishable from a PN we have not yet received and risk us processing a
346
* duplicate packet. In other words, for a given PN:
347
*
348
* - State (A) can transition to state (B) or (C)
349
* - State (B) can transition to state (C) only
350
* - State (C) is the terminal state
351
*
352
* We can query the logical set data structure for PNs which have been received
353
* but which have not been provably ACKed when we want to generate ACK frames.
354
* Since ACK frames can be lost and/or we might not know that the peer has
355
* successfully received them, we might generate multiple ACK frames covering a
356
* given PN until that PN becomes provably ACKed and we finally remove it from
357
* our set (by bumping the watermark) as no longer being our concern.
358
*
359
* The data structure used is the UINT_SET structure defined in uint_set.h,
360
* which is used as a PN set. We use the following operations of the structure:
361
*
362
* Insert Range: Used when we receive a new PN.
363
*
364
* Remove Range: Used when bumping the watermark.
365
*
366
* Query: Used to determine if a PN is in the set.
367
*
368
* **Possible duplicates.** A PN is considered a possible duplicate when either:
369
*
370
* a) its PN is already in the PN set (i.e. has already been received), or
371
* b) its PN is below the watermark (i.e. was provably ACKed or written off).
372
*
373
* A packet with a given PN is considered 'processable' when that PN is not
374
* considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
375
*
376
* **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
377
* provably ACKed. This occurs when an ACK frame is received by the TX side of
378
* the ACK manager; thus, there is necessary interaction between the TX and RX
379
* sides of the ACK manager.
380
*
381
* This is implemented as follows. When a packet is queued as sent in the TX
382
* side of the ACK manager, it may optionally have a Largest Acked value set on
383
* it. The user of the ACK manager should do this if the packet being
384
* transmitted contains an ACK frame, by setting the field to the Largest Acked
385
* field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
386
* When a TX packet is eventually acknowledged which has this field set, it is
387
* used to update the state of the RX side of the ACK manager by bumping the
388
* watermark accordingly.
389
*/
390
struct rx_pkt_history_st {
391
UINT_SET set;
392
393
/*
394
* Invariant: PNs below this are not in the set.
395
* Invariant: This is monotonic and only ever increases.
396
*/
397
QUIC_PN watermark;
398
};
399
400
static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
401
QUIC_PN watermark);
402
403
static void rx_pkt_history_init(struct rx_pkt_history_st *h)
404
{
405
ossl_uint_set_init(&h->set);
406
h->watermark = 0;
407
}
408
409
static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
410
{
411
ossl_uint_set_destroy(&h->set);
412
}
413
414
/*
415
* Limit the number of ACK ranges we store to prevent resource consumption DoS
416
* attacks.
417
*/
418
#define MAX_RX_ACK_RANGES 32
419
420
static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
421
{
422
QUIC_PN highest = QUIC_PN_INVALID;
423
424
while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) {
425
UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range;
426
427
highest = (highest == QUIC_PN_INVALID)
428
? r.end
429
: ossl_quic_pn_max(highest, r.end);
430
431
ossl_uint_set_remove(&h->set, &r);
432
}
433
434
/*
435
* Bump watermark to cover all PNs we removed to avoid accidental
436
* reprocessing of packets.
437
*/
438
if (highest != QUIC_PN_INVALID)
439
rx_pkt_history_bump_watermark(h, highest + 1);
440
}
441
442
static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h,
443
QUIC_PN pn)
444
{
445
UINT_RANGE r;
446
447
r.start = pn;
448
r.end = pn;
449
450
if (pn < h->watermark)
451
return 1; /* consider this a success case */
452
453
if (ossl_uint_set_insert(&h->set, &r) != 1)
454
return 0;
455
456
rx_pkt_history_trim_range_count(h);
457
return 1;
458
}
459
460
static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
461
QUIC_PN watermark)
462
{
463
UINT_RANGE r;
464
465
if (watermark <= h->watermark)
466
return 1;
467
468
/* Remove existing PNs below the watermark. */
469
r.start = 0;
470
r.end = watermark - 1;
471
if (ossl_uint_set_remove(&h->set, &r) != 1)
472
return 0;
473
474
h->watermark = watermark;
475
return 1;
476
}
477
478
/*
479
* ACK Manager Implementation
480
* **************************
481
* Implementation of the ACK manager proper.
482
*/
483
484
/* Constants used by the ACK manager; see RFC 9002. */
485
#define K_GRANULARITY (1 * OSSL_TIME_MS)
486
#define K_PKT_THRESHOLD 3
487
#define K_TIME_THRESHOLD_NUM 9
488
#define K_TIME_THRESHOLD_DEN 8
489
490
/* The maximum number of times we allow PTO to be doubled. */
491
#define MAX_PTO_COUNT 16
492
493
/* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */
494
#define DEFAULT_TX_MAX_ACK_DELAY ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY)
495
496
struct ossl_ackm_st {
497
/* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
498
struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM];
499
500
/* Our list of received PNs which are not yet provably acked. */
501
struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM];
502
503
/* Polymorphic dependencies that we consume. */
504
OSSL_TIME (*now)(void *arg);
505
void *now_arg;
506
OSSL_STATM *statm;
507
const OSSL_CC_METHOD *cc_method;
508
OSSL_CC_DATA *cc_data;
509
510
/* RFC 9002 variables. */
511
uint32_t pto_count;
512
QUIC_PN largest_acked_pkt[QUIC_PN_SPACE_NUM];
513
OSSL_TIME time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM];
514
OSSL_TIME loss_time[QUIC_PN_SPACE_NUM];
515
OSSL_TIME loss_detection_deadline;
516
517
/* Lowest PN which is still not known to be ACKed. */
518
QUIC_PN lowest_unacked_pkt[QUIC_PN_SPACE_NUM];
519
520
/* Time at which we got our first RTT sample, or 0. */
521
OSSL_TIME first_rtt_sample;
522
523
/*
524
* A packet's num_bytes are added to this if it is inflight,
525
* and removed again once ack'd/lost/discarded.
526
*/
527
uint64_t bytes_in_flight;
528
529
/*
530
* A packet's num_bytes are added to this if it is both inflight and
531
* ack-eliciting, and removed again once ack'd/lost/discarded.
532
*/
533
uint64_t ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM];
534
535
/* Count of ECN-CE events. */
536
uint64_t peer_ecnce[QUIC_PN_SPACE_NUM];
537
538
/* Set to 1 when the handshake is confirmed. */
539
char handshake_confirmed;
540
541
/* Set to 1 when attached to server channel */
542
char is_server;
543
544
/* Set to 1 when the peer has completed address validation. */
545
char peer_completed_addr_validation;
546
547
/* Set to 1 when a PN space has been discarded. */
548
char discarded[QUIC_PN_SPACE_NUM];
549
550
/* Set to 1 when we think an ACK frame should be generated. */
551
char rx_ack_desired[QUIC_PN_SPACE_NUM];
552
553
/* Set to 1 if an ACK frame has ever been generated. */
554
char rx_ack_generated[QUIC_PN_SPACE_NUM];
555
556
/* Probe request counts for reporting to the user. */
557
OSSL_ACKM_PROBE_INFO pending_probe;
558
559
/* Generated ACK frames for each PN space. */
560
OSSL_QUIC_FRAME_ACK ack[QUIC_PN_SPACE_NUM];
561
OSSL_QUIC_ACK_RANGE ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES];
562
563
/* Other RX state. */
564
/* Largest PN we have RX'd. */
565
QUIC_PN rx_largest_pn[QUIC_PN_SPACE_NUM];
566
567
/* Time at which the PN in rx_largest_pn was RX'd. */
568
OSSL_TIME rx_largest_time[QUIC_PN_SPACE_NUM];
569
570
/*
571
* ECN event counters. Each time we receive a packet with a given ECN label,
572
* the corresponding ECN counter here is incremented.
573
*/
574
uint64_t rx_ect0[QUIC_PN_SPACE_NUM];
575
uint64_t rx_ect1[QUIC_PN_SPACE_NUM];
576
uint64_t rx_ecnce[QUIC_PN_SPACE_NUM];
577
578
/*
579
* Number of ACK-eliciting packets since last ACK. We use this to defer
580
* emitting ACK frames until a threshold number of ACK-eliciting packets
581
* have been received.
582
*/
583
uint32_t rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM];
584
585
/*
586
* The ACK frame coalescing deadline at which we should flush any unsent ACK
587
* frames.
588
*/
589
OSSL_TIME rx_ack_flush_deadline[QUIC_PN_SPACE_NUM];
590
591
/*
592
* The RX maximum ACK delay (the maximum amount of time our peer might
593
* wait to send us an ACK after receiving an ACK-eliciting packet).
594
*/
595
OSSL_TIME rx_max_ack_delay;
596
597
/*
598
* The TX maximum ACK delay (the maximum amount of time we allow ourselves
599
* to wait before generating an ACK after receiving an ACK-eliciting
600
* packet).
601
*/
602
OSSL_TIME tx_max_ack_delay;
603
604
/* Callbacks for deadline updates. */
605
void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg);
606
void *loss_detection_deadline_cb_arg;
607
608
void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg);
609
void *ack_deadline_cb_arg;
610
};
611
612
static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y)
613
{
614
return x < y ? x : y;
615
}
616
617
/*
618
* Get TX history for a given packet number space. Must not have been
619
* discarded.
620
*/
621
static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space)
622
{
623
assert(!ackm->discarded[pkt_space]);
624
625
return &ackm->tx_history[pkt_space];
626
}
627
628
/*
629
* Get RX history for a given packet number space. Must not have been
630
* discarded.
631
*/
632
static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space)
633
{
634
assert(!ackm->discarded[pkt_space]);
635
636
return &ackm->rx_history[pkt_space];
637
}
638
639
/* Does the newly-acknowledged list contain any ack-eliciting packet? */
640
static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt)
641
{
642
for (; pkt != NULL; pkt = pkt->anext)
643
if (pkt->is_ack_eliciting)
644
return 1;
645
646
return 0;
647
}
648
649
/* Return number of ACK-eliciting bytes in flight across all PN spaces. */
650
static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm)
651
{
652
int i;
653
uint64_t total = 0;
654
655
for (i = 0; i < QUIC_PN_SPACE_NUM; ++i)
656
total += ackm->ack_eliciting_bytes_in_flight[i];
657
658
return total;
659
}
660
661
/* Return 1 if the range contains the given PN. */
662
static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn)
663
{
664
return pn >= range->start && pn <= range->end;
665
}
666
667
/*
668
* Given a logical representation of an ACK frame 'ack', create a singly-linked
669
* list of the newly ACK'd frames; that is, of frames which are matched by the
670
* list of PN ranges contained in the ACK frame. The packet structures in the
671
* list returned are removed from the TX history list. Returns a pointer to the
672
* list head (or NULL) if empty.
673
*/
674
static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm,
675
const OSSL_QUIC_FRAME_ACK *ack,
676
int pkt_space)
677
{
678
OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev;
679
struct tx_pkt_history_st *h;
680
size_t ridx = 0;
681
682
assert(ack->num_ack_ranges > 0);
683
684
/*
685
* Our history list is a list of packets sorted in ascending order
686
* by packet number.
687
*
688
* ack->ack_ranges is a list of packet number ranges in descending order.
689
*
690
* Walk through our history list from the end in order to efficiently detect
691
* membership in the specified ack ranges. As an optimization, we use our
692
* hashtable to try and skip to the first matching packet. This may fail if
693
* the ACK ranges given include nonexistent packets.
694
*/
695
h = get_tx_history(ackm, pkt_space);
696
697
pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
698
if (pkt == NULL)
699
pkt = ossl_list_tx_history_tail(&h->packets);
700
701
for (; pkt != NULL; pkt = pprev) {
702
/*
703
* Save prev value as it will be zeroed if we remove the packet from the
704
* history list below.
705
*/
706
pprev = ossl_list_tx_history_prev(pkt);
707
708
for (;; ++ridx) {
709
if (ridx >= ack->num_ack_ranges) {
710
/*
711
* We have exhausted all ranges so stop here, even if there are
712
* more packets to look at.
713
*/
714
goto stop;
715
}
716
717
if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) {
718
/* We have matched this range. */
719
tx_pkt_history_remove(h, pkt->pkt_num);
720
721
*fixup = pkt;
722
fixup = &pkt->anext;
723
*fixup = NULL;
724
break;
725
} else if (pkt->pkt_num > ack->ack_ranges[ridx].end) {
726
/*
727
* We have not reached this range yet in our list, so do not
728
* advance ridx.
729
*/
730
break;
731
} else {
732
/*
733
* We have moved beyond this range, so advance to the next range
734
* and try matching again.
735
*/
736
assert(pkt->pkt_num < ack->ack_ranges[ridx].start);
737
continue;
738
}
739
}
740
}
741
stop:
742
743
return acked_pkts;
744
}
745
746
/*
747
* Create a singly-linked list of newly detected-lost packets in the given
748
* packet number space. Returns the head of the list or NULL if no packets were
749
* detected lost. The packets in the list are removed from the TX history list.
750
*/
751
static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm,
752
int pkt_space)
753
{
754
OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext;
755
OSSL_TIME loss_delay, lost_send_time, now;
756
OSSL_RTT_INFO rtt;
757
struct tx_pkt_history_st *h;
758
759
assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID);
760
761
ossl_statm_get_rtt_info(ackm->statm, &rtt);
762
763
ackm->loss_time[pkt_space] = ossl_time_zero();
764
765
loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt,
766
rtt.smoothed_rtt),
767
K_TIME_THRESHOLD_NUM);
768
loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN);
769
770
/* Minimum time of K_GRANULARITY before packets are deemed lost. */
771
loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY));
772
773
/* Packets sent before this time are deemed lost. */
774
now = ackm->now(ackm->now_arg);
775
lost_send_time = ossl_time_subtract(now, loss_delay);
776
777
h = get_tx_history(ackm, pkt_space);
778
pkt = ossl_list_tx_history_head(&h->packets);
779
780
for (; pkt != NULL; pkt = pnext) {
781
assert(pkt_space == pkt->pkt_space);
782
783
/*
784
* Save prev value as it will be zeroed if we remove the packet from the
785
* history list below.
786
*/
787
pnext = ossl_list_tx_history_next(pkt);
788
789
if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space])
790
continue;
791
792
/*
793
* Mark packet as lost, or set time when it should be marked.
794
*/
795
if (ossl_time_compare(pkt->time, lost_send_time) <= 0
796
|| ackm->largest_acked_pkt[pkt_space]
797
>= pkt->pkt_num + K_PKT_THRESHOLD) {
798
tx_pkt_history_remove(h, pkt->pkt_num);
799
800
*fixup = pkt;
801
fixup = &pkt->lnext;
802
*fixup = NULL;
803
} else {
804
if (ossl_time_is_zero(ackm->loss_time[pkt_space]))
805
ackm->loss_time[pkt_space] = ossl_time_add(pkt->time, loss_delay);
806
else
807
ackm->loss_time[pkt_space] = ossl_time_min(ackm->loss_time[pkt_space],
808
ossl_time_add(pkt->time, loss_delay));
809
}
810
}
811
812
return lost_pkts;
813
}
814
815
static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace)
816
{
817
OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL];
818
int i, space = QUIC_PN_SPACE_INITIAL;
819
820
for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i)
821
if (ossl_time_is_zero(time)
822
|| ossl_time_compare(ackm->loss_time[i], time) == -1) {
823
time = ackm->loss_time[i];
824
space = i;
825
}
826
827
*pspace = space;
828
return time;
829
}
830
831
static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space)
832
{
833
OSSL_RTT_INFO rtt;
834
OSSL_TIME duration;
835
OSSL_TIME pto_timeout = ossl_time_infinite(), t;
836
int pto_space = QUIC_PN_SPACE_INITIAL, i;
837
838
ossl_statm_get_rtt_info(ackm->statm, &rtt);
839
840
duration
841
= ossl_time_add(rtt.smoothed_rtt,
842
ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4),
843
ossl_ticks2time(K_GRANULARITY)));
844
845
duration
846
= ossl_time_multiply(duration,
847
(uint64_t)1 << min_u32(ackm->pto_count,
848
MAX_PTO_COUNT));
849
850
/* Anti-deadlock PTO starts from the current time. */
851
if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
852
assert(!ackm->peer_completed_addr_validation);
853
854
*space = ackm->discarded[QUIC_PN_SPACE_INITIAL]
855
? QUIC_PN_SPACE_HANDSHAKE
856
: QUIC_PN_SPACE_INITIAL;
857
return ossl_time_add(ackm->now(ackm->now_arg), duration);
858
}
859
860
for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) {
861
/*
862
* RFC 9002 section 6.2.2.1 keep probe timeout armed until
863
* handshake is confirmed (client sees HANDSHAKE_DONE message
864
* from server).
865
*/
866
if (ackm->ack_eliciting_bytes_in_flight[i] == 0 && (ackm->handshake_confirmed == 1 || ackm->is_server == 1))
867
continue;
868
869
if (i == QUIC_PN_SPACE_APP) {
870
/* Skip application data until handshake confirmed. */
871
if (!ackm->handshake_confirmed)
872
break;
873
874
/* Include max_ack_delay and backoff for app data. */
875
if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) {
876
uint64_t factor
877
= (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT);
878
879
duration
880
= ossl_time_add(duration,
881
ossl_time_multiply(ackm->rx_max_ack_delay,
882
factor));
883
}
884
}
885
886
/*
887
* Only re-arm timer if stack has sent at least one ACK eliciting frame.
888
* If stack has sent no ACK eliciting frame at given encryption level then
889
* particular timer is zero and we must not attempt to set it. Timer keeps
890
* time since epoch (Jan 1 1970) and we must not set timer to past.
891
*/
892
if (!ossl_time_is_zero(ackm->time_of_last_ack_eliciting_pkt[i])) {
893
t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration);
894
if (ossl_time_compare(t, pto_timeout) < 0) {
895
pto_timeout = t;
896
pto_space = i;
897
}
898
}
899
}
900
901
*space = pto_space;
902
return pto_timeout;
903
}
904
905
static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm,
906
OSSL_TIME deadline)
907
{
908
ackm->loss_detection_deadline = deadline;
909
910
if (ackm->loss_detection_deadline_cb != NULL)
911
ackm->loss_detection_deadline_cb(deadline,
912
ackm->loss_detection_deadline_cb_arg);
913
}
914
915
static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm)
916
{
917
int space;
918
OSSL_TIME earliest_loss_time, timeout;
919
920
earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space);
921
if (!ossl_time_is_zero(earliest_loss_time)) {
922
/* Time threshold loss detection. */
923
ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time);
924
return 1;
925
}
926
927
if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0
928
&& ackm->peer_completed_addr_validation) {
929
/*
930
* Nothing to detect lost, so no timer is set. However, the client
931
* needs to arm the timer if the server might be blocked by the
932
* anti-amplification limit.
933
*/
934
ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero());
935
return 1;
936
}
937
938
timeout = ackm_get_pto_time_and_space(ackm, &space);
939
ackm_set_loss_detection_timer_actual(ackm, timeout);
940
return 1;
941
}
942
943
static int ackm_in_persistent_congestion(OSSL_ACKM *ackm,
944
const OSSL_ACKM_TX_PKT *lpkt)
945
{
946
/* TODO(QUIC FUTURE): Persistent congestion not currently implemented. */
947
return 0;
948
}
949
950
static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space,
951
const OSSL_ACKM_TX_PKT *lpkt, int pseudo)
952
{
953
const OSSL_ACKM_TX_PKT *p, *pnext;
954
OSSL_RTT_INFO rtt;
955
QUIC_PN largest_pn_lost = 0;
956
OSSL_CC_LOSS_INFO loss_info = { 0 };
957
uint32_t flags = 0;
958
959
for (p = lpkt; p != NULL; p = pnext) {
960
pnext = p->lnext;
961
962
if (p->is_inflight) {
963
ackm->bytes_in_flight -= p->num_bytes;
964
if (p->is_ack_eliciting)
965
ackm->ack_eliciting_bytes_in_flight[p->pkt_space]
966
-= p->num_bytes;
967
968
if (p->pkt_num > largest_pn_lost)
969
largest_pn_lost = p->pkt_num;
970
971
if (!pseudo) {
972
/*
973
* If this is pseudo-loss (e.g. during connection retry) we do not
974
* inform the CC as it is not a real loss and not reflective of
975
* network conditions.
976
*/
977
loss_info.tx_time = p->time;
978
loss_info.tx_size = p->num_bytes;
979
980
ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info);
981
}
982
}
983
984
p->on_lost(p->cb_arg);
985
}
986
987
/*
988
* Persistent congestion can only be considered if we have gotten at least
989
* one RTT sample.
990
*/
991
ossl_statm_get_rtt_info(ackm->statm, &rtt);
992
if (!ossl_time_is_zero(ackm->first_rtt_sample)
993
&& ackm_in_persistent_congestion(ackm, lpkt))
994
flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION;
995
996
ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags);
997
}
998
999
static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt)
1000
{
1001
const OSSL_ACKM_TX_PKT *anext;
1002
QUIC_PN last_pn_acked = 0;
1003
OSSL_CC_ACK_INFO ainfo = { 0 };
1004
1005
for (; apkt != NULL; apkt = anext) {
1006
if (apkt->is_inflight) {
1007
ackm->bytes_in_flight -= apkt->num_bytes;
1008
if (apkt->is_ack_eliciting)
1009
ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space]
1010
-= apkt->num_bytes;
1011
1012
if (apkt->pkt_num > last_pn_acked)
1013
last_pn_acked = apkt->pkt_num;
1014
1015
if (apkt->largest_acked != QUIC_PN_INVALID)
1016
/*
1017
* This can fail, but it is monotonic; worst case we try again
1018
* next time.
1019
*/
1020
rx_pkt_history_bump_watermark(get_rx_history(ackm,
1021
apkt->pkt_space),
1022
apkt->largest_acked + 1);
1023
}
1024
1025
ainfo.tx_time = apkt->time;
1026
ainfo.tx_size = apkt->num_bytes;
1027
1028
anext = apkt->anext;
1029
apkt->on_acked(apkt->cb_arg); /* may free apkt */
1030
1031
if (apkt->is_inflight)
1032
ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo);
1033
}
1034
}
1035
1036
OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg),
1037
void *now_arg,
1038
OSSL_STATM *statm,
1039
const OSSL_CC_METHOD *cc_method,
1040
OSSL_CC_DATA *cc_data,
1041
int is_server)
1042
{
1043
OSSL_ACKM *ackm;
1044
int i;
1045
1046
ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM));
1047
if (ackm == NULL)
1048
return NULL;
1049
1050
for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) {
1051
ackm->largest_acked_pkt[i] = QUIC_PN_INVALID;
1052
ackm->rx_ack_flush_deadline[i] = ossl_time_infinite();
1053
if (tx_pkt_history_init(&ackm->tx_history[i]) < 1)
1054
goto err;
1055
}
1056
1057
for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i)
1058
rx_pkt_history_init(&ackm->rx_history[i]);
1059
1060
ackm->now = now;
1061
ackm->now_arg = now_arg;
1062
ackm->statm = statm;
1063
ackm->cc_method = cc_method;
1064
ackm->cc_data = cc_data;
1065
ackm->is_server = (char)is_server;
1066
1067
ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY);
1068
ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY;
1069
1070
return ackm;
1071
1072
err:
1073
while (--i >= 0)
1074
tx_pkt_history_destroy(&ackm->tx_history[i]);
1075
1076
OPENSSL_free(ackm);
1077
return NULL;
1078
}
1079
1080
void ossl_ackm_free(OSSL_ACKM *ackm)
1081
{
1082
size_t i;
1083
1084
if (ackm == NULL)
1085
return;
1086
1087
for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i)
1088
if (!ackm->discarded[i]) {
1089
tx_pkt_history_destroy(&ackm->tx_history[i]);
1090
rx_pkt_history_destroy(&ackm->rx_history[i]);
1091
}
1092
1093
OPENSSL_free(ackm);
1094
}
1095
1096
int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt)
1097
{
1098
struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space);
1099
1100
/* Time must be set and not move backwards. */
1101
if (ossl_time_is_zero(pkt->time)
1102
|| ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space],
1103
pkt->time)
1104
> 0)
1105
return 0;
1106
1107
/* Must have non-zero number of bytes. */
1108
if (pkt->num_bytes == 0)
1109
return 0;
1110
1111
/* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */
1112
if (!pkt->is_inflight && pkt->is_ack_eliciting)
1113
return 0;
1114
1115
if (tx_pkt_history_add(h, pkt) == 0)
1116
return 0;
1117
1118
if (pkt->is_inflight) {
1119
if (pkt->is_ack_eliciting) {
1120
ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time;
1121
ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space]
1122
+= pkt->num_bytes;
1123
}
1124
1125
ackm->bytes_in_flight += pkt->num_bytes;
1126
ackm_set_loss_detection_timer(ackm);
1127
1128
ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes);
1129
}
1130
1131
return 1;
1132
}
1133
1134
int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes)
1135
{
1136
/* No-op on the client. */
1137
return 1;
1138
}
1139
1140
static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
1141
int pkt_space)
1142
{
1143
struct tx_pkt_history_st *h;
1144
OSSL_ACKM_TX_PKT *pkt;
1145
OSSL_CC_ECN_INFO ecn_info = { 0 };
1146
1147
/*
1148
* If the ECN-CE counter reported by the peer has increased, this could
1149
* be a new congestion event.
1150
*/
1151
if (ack->ecnce > ackm->peer_ecnce[pkt_space]) {
1152
ackm->peer_ecnce[pkt_space] = ack->ecnce;
1153
1154
h = get_tx_history(ackm, pkt_space);
1155
pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
1156
if (pkt == NULL)
1157
return;
1158
1159
ecn_info.largest_acked_time = pkt->time;
1160
ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info);
1161
}
1162
}
1163
1164
int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
1165
int pkt_space, OSSL_TIME rx_time)
1166
{
1167
OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts;
1168
int must_set_timer = 0;
1169
1170
if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID)
1171
ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end;
1172
else
1173
ackm->largest_acked_pkt[pkt_space]
1174
= ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space],
1175
ack->ack_ranges[0].end);
1176
1177
/*
1178
* If we get an ACK in the handshake space, address validation is completed.
1179
* Make sure we update the timer, even if no packets were ACK'd.
1180
*/
1181
if (!ackm->peer_completed_addr_validation
1182
&& pkt_space == QUIC_PN_SPACE_HANDSHAKE) {
1183
ackm->peer_completed_addr_validation = 1;
1184
must_set_timer = 1;
1185
}
1186
1187
/*
1188
* Find packets that are newly acknowledged and remove them from the list.
1189
*/
1190
na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space);
1191
if (na_pkts == NULL) {
1192
if (must_set_timer)
1193
ackm_set_loss_detection_timer(ackm);
1194
1195
return 1;
1196
}
1197
1198
/*
1199
* Update the RTT if the largest acknowledged is newly acked and at least
1200
* one ACK-eliciting packet was newly acked.
1201
*
1202
* First packet in the list is always the one with the largest PN.
1203
*/
1204
if (na_pkts->pkt_num == ack->ack_ranges[0].end && ack_includes_ack_eliciting(na_pkts)) {
1205
OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay;
1206
if (ossl_time_is_zero(ackm->first_rtt_sample))
1207
ackm->first_rtt_sample = now;
1208
1209
/* Enforce maximum ACK delay. */
1210
ack_delay = ack->delay_time;
1211
if (ackm->handshake_confirmed)
1212
ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay);
1213
1214
ossl_statm_update_rtt(ackm->statm, ack_delay,
1215
ossl_time_subtract(now, na_pkts->time));
1216
}
1217
1218
/*
1219
* Process ECN information if present.
1220
*
1221
* We deliberately do most ECN processing in the ACKM rather than the
1222
* congestion controller to avoid having to give the congestion controller
1223
* access to ACKM internal state.
1224
*/
1225
if (ack->ecn_present)
1226
ackm_process_ecn(ackm, ack, pkt_space);
1227
1228
/* Handle inferred loss. */
1229
lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
1230
if (lost_pkts != NULL)
1231
ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0);
1232
1233
ackm_on_pkts_acked(ackm, na_pkts);
1234
1235
/*
1236
* Reset pto_count unless the client is unsure if the server validated the
1237
* client's address.
1238
*/
1239
if (ackm->peer_completed_addr_validation)
1240
ackm->pto_count = 0;
1241
1242
ackm_set_loss_detection_timer(ackm);
1243
return 1;
1244
}
1245
1246
int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space)
1247
{
1248
OSSL_ACKM_TX_PKT *pkt, *pnext;
1249
uint64_t num_bytes_invalidated = 0;
1250
1251
if (ackm->discarded[pkt_space])
1252
return 0;
1253
1254
if (pkt_space == QUIC_PN_SPACE_HANDSHAKE)
1255
ackm->peer_completed_addr_validation = 1;
1256
1257
for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets);
1258
pkt != NULL; pkt = pnext) {
1259
pnext = ossl_list_tx_history_next(pkt);
1260
if (pkt->is_inflight) {
1261
ackm->bytes_in_flight -= pkt->num_bytes;
1262
num_bytes_invalidated += pkt->num_bytes;
1263
}
1264
1265
pkt->on_discarded(pkt->cb_arg); /* may free pkt */
1266
}
1267
1268
tx_pkt_history_destroy(&ackm->tx_history[pkt_space]);
1269
rx_pkt_history_destroy(&ackm->rx_history[pkt_space]);
1270
1271
if (num_bytes_invalidated > 0)
1272
ackm->cc_method->on_data_invalidated(ackm->cc_data,
1273
num_bytes_invalidated);
1274
1275
ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero();
1276
ackm->loss_time[pkt_space] = ossl_time_zero();
1277
ackm->pto_count = 0;
1278
ackm->discarded[pkt_space] = 1;
1279
ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0;
1280
ackm_set_loss_detection_timer(ackm);
1281
return 1;
1282
}
1283
1284
int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm)
1285
{
1286
ackm->handshake_confirmed = 1;
1287
ackm->peer_completed_addr_validation = 1;
1288
ackm_set_loss_detection_timer(ackm);
1289
return 1;
1290
}
1291
1292
static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm)
1293
{
1294
++ackm->pending_probe.anti_deadlock_handshake;
1295
}
1296
1297
static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm)
1298
{
1299
++ackm->pending_probe.anti_deadlock_initial;
1300
}
1301
1302
static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space)
1303
{
1304
/*
1305
* TODO(QUIC FUTURE): We are allowed to send either one or two probe
1306
* packets here.
1307
* Determine a strategy for when we should send two probe packets.
1308
*/
1309
++ackm->pending_probe.pto[pkt_space];
1310
}
1311
1312
int ossl_ackm_on_timeout(OSSL_ACKM *ackm)
1313
{
1314
int pkt_space;
1315
OSSL_TIME earliest_loss_time;
1316
OSSL_ACKM_TX_PKT *lost_pkts;
1317
1318
earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space);
1319
if (!ossl_time_is_zero(earliest_loss_time)) {
1320
/* Time threshold loss detection. */
1321
lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
1322
if (lost_pkts != NULL)
1323
ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0);
1324
ackm_set_loss_detection_timer(ackm);
1325
return 1;
1326
}
1327
1328
if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
1329
assert(!ackm->peer_completed_addr_validation);
1330
/*
1331
* Client sends an anti-deadlock packet: Initial is padded to earn more
1332
* anti-amplification credit. A handshake packet proves address
1333
* ownership.
1334
*/
1335
if (ackm->discarded[QUIC_PN_SPACE_INITIAL])
1336
ackm_queue_probe_anti_deadlock_handshake(ackm);
1337
else
1338
ackm_queue_probe_anti_deadlock_initial(ackm);
1339
} else {
1340
/*
1341
* PTO. The user of the ACKM should send new data if available, else
1342
* retransmit old data, or if neither is available, send a single PING
1343
* frame.
1344
*/
1345
ackm_get_pto_time_and_space(ackm, &pkt_space);
1346
ackm_queue_probe(ackm, pkt_space);
1347
}
1348
1349
++ackm->pto_count;
1350
ackm_set_loss_detection_timer(ackm);
1351
return 1;
1352
}
1353
1354
OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm)
1355
{
1356
return ackm->loss_detection_deadline;
1357
}
1358
1359
OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm)
1360
{
1361
return &ackm->pending_probe;
1362
}
1363
1364
int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn)
1365
{
1366
struct tx_pkt_history_st *h;
1367
OSSL_ACKM_TX_PKT *p;
1368
1369
h = get_tx_history(ackm, pkt_space);
1370
p = ossl_list_tx_history_tail(&h->packets);
1371
if (p != NULL) {
1372
*pn = p->pkt_num;
1373
return 1;
1374
}
1375
1376
return 0;
1377
}
1378
1379
/* Number of ACK-eliciting packets RX'd before we always emit an ACK. */
1380
#define PKTS_BEFORE_ACK 2
1381
1382
/*
1383
* Return 1 if emission of an ACK frame is currently desired.
1384
*
1385
* This occurs when one or more of the following conditions occurs:
1386
*
1387
* - We have flagged that we want to send an ACK frame
1388
* (for example, due to the packet threshold count being exceeded), or
1389
*
1390
* - We have exceeded the ACK flush deadline, meaning that
1391
* we have received at least one ACK-eliciting packet, but held off on
1392
* sending an ACK frame immediately in the hope that more ACK-eliciting
1393
* packets might come in, but not enough did and we are now requesting
1394
* transmission of an ACK frame anyway.
1395
*
1396
*/
1397
int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space)
1398
{
1399
return ackm->rx_ack_desired[pkt_space]
1400
|| (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])
1401
&& ossl_time_compare(ackm->now(ackm->now_arg),
1402
ackm->rx_ack_flush_deadline[pkt_space])
1403
>= 0);
1404
}
1405
1406
/*
1407
* Returns 1 if an ACK frame matches a given packet number.
1408
*/
1409
static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num)
1410
{
1411
size_t i;
1412
1413
for (i = 0; i < ack->num_ack_ranges; ++i)
1414
if (range_contains(&ack->ack_ranges[i], pkt_num))
1415
return 1;
1416
1417
return 0;
1418
}
1419
1420
/*
1421
* Returns 1 iff a PN (which we have just received) was previously reported as
1422
* implied missing (by us, in an ACK frame we previously generated).
1423
*/
1424
static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num)
1425
{
1426
/*
1427
* A PN is implied missing if it is not greater than the highest PN in our
1428
* generated ACK frame, but is not matched by the frame.
1429
*/
1430
return ackm->ack[pkt_space].num_ack_ranges > 0
1431
&& pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end
1432
&& !ack_contains(&ackm->ack[pkt_space], pkt_num);
1433
}
1434
1435
/*
1436
* Returns 1 iff our RX of a PN newly establishes the implication of missing
1437
* packets.
1438
*/
1439
static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space)
1440
{
1441
struct rx_pkt_history_st *h;
1442
1443
h = get_rx_history(ackm, pkt_space);
1444
1445
if (ossl_list_uint_set_is_empty(&h->set))
1446
return 0;
1447
1448
/*
1449
* The second condition here establishes that the highest PN range in our RX
1450
* history comprises only a single PN. If there is more than one, then this
1451
* function will have returned 1 during a previous call to
1452
* ossl_ackm_on_rx_packet assuming the third condition below was met. Thus
1453
* we only return 1 when the missing PN condition is newly established.
1454
*
1455
* The third condition here establishes that the highest PN range in our RX
1456
* history is beyond (and does not border) the highest PN we have yet
1457
* reported in any ACK frame. Thus there is a gap of at least one PN between
1458
* the PNs we have ACK'd previously and the PN we have just received.
1459
*/
1460
return ackm->ack[pkt_space].num_ack_ranges > 0
1461
&& ossl_list_uint_set_tail(&h->set)->range.start
1462
== ossl_list_uint_set_tail(&h->set)->range.end
1463
&& ossl_list_uint_set_tail(&h->set)->range.start
1464
> ackm->ack[pkt_space].ack_ranges[0].end + 1;
1465
}
1466
1467
static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space,
1468
OSSL_TIME deadline)
1469
{
1470
ackm->rx_ack_flush_deadline[pkt_space] = deadline;
1471
1472
if (ackm->ack_deadline_cb != NULL)
1473
ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space),
1474
pkt_space, ackm->ack_deadline_cb_arg);
1475
}
1476
1477
/* Explicitly flags that we want to generate an ACK frame. */
1478
static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space)
1479
{
1480
ackm->rx_ack_desired[pkt_space] = 1;
1481
1482
/* Cancel deadline. */
1483
ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
1484
}
1485
1486
static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm,
1487
OSSL_TIME rx_time, int pkt_space,
1488
int was_missing)
1489
{
1490
OSSL_TIME tx_max_ack_delay;
1491
1492
if (ackm->rx_ack_desired[pkt_space])
1493
/* ACK generation already requested so nothing to do. */
1494
return;
1495
1496
++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space];
1497
1498
if (!ackm->rx_ack_generated[pkt_space]
1499
|| was_missing
1500
|| ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]
1501
>= PKTS_BEFORE_ACK
1502
|| ackm_has_newly_missing(ackm, pkt_space)) {
1503
/*
1504
* Either:
1505
*
1506
* - We have never yet generated an ACK frame, meaning that this
1507
* is the first ever packet received, which we should always
1508
* acknowledge immediately, or
1509
*
1510
* - We previously reported the PN that we have just received as
1511
* missing in a previous ACK frame (meaning that we should report
1512
* the fact that we now have it to the peer immediately), or
1513
*
1514
* - We have exceeded the ACK-eliciting packet threshold count
1515
* for the purposes of ACK coalescing, so request transmission
1516
* of an ACK frame, or
1517
*
1518
* - The PN we just received and added to our PN RX history
1519
* newly implies one or more missing PNs, in which case we should
1520
* inform the peer by sending an ACK frame immediately.
1521
*
1522
* We do not test the ACK flush deadline here because it is tested
1523
* separately in ossl_ackm_is_ack_desired.
1524
*/
1525
ackm_queue_ack(ackm, pkt_space);
1526
return;
1527
}
1528
1529
/*
1530
* Not emitting an ACK yet.
1531
*
1532
* Update the ACK flush deadline.
1533
*
1534
* RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting
1535
* Initial and Handshake packets immediately"; don't delay ACK generation if
1536
* we are using the Initial or Handshake PN spaces.
1537
*/
1538
tx_max_ack_delay = ackm->tx_max_ack_delay;
1539
if (pkt_space == QUIC_PN_SPACE_INITIAL
1540
|| pkt_space == QUIC_PN_SPACE_HANDSHAKE)
1541
tx_max_ack_delay = ossl_time_zero();
1542
1543
if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]))
1544
ackm_set_flush_deadline(ackm, pkt_space,
1545
ossl_time_add(rx_time, tx_max_ack_delay));
1546
else
1547
ackm_set_flush_deadline(ackm, pkt_space,
1548
ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space],
1549
ossl_time_add(rx_time,
1550
tx_max_ack_delay)));
1551
}
1552
1553
int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt)
1554
{
1555
struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space);
1556
int was_missing;
1557
1558
if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1)
1559
/* PN has already been processed or written off, no-op. */
1560
return 1;
1561
1562
/*
1563
* Record the largest PN we have RX'd and the time we received it.
1564
* We use this to calculate the ACK delay field of ACK frames.
1565
*/
1566
if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) {
1567
ackm->rx_largest_pn[pkt->pkt_space] = pkt->pkt_num;
1568
ackm->rx_largest_time[pkt->pkt_space] = pkt->time;
1569
}
1570
1571
/*
1572
* If the PN we just received was previously implied missing by virtue of
1573
* being omitted from a previous ACK frame generated, we skip any packet
1574
* count thresholds or coalescing delays and emit a new ACK frame
1575
* immediately.
1576
*/
1577
was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num);
1578
1579
/*
1580
* Add the packet number to our history list of PNs we have not yet provably
1581
* acked.
1582
*/
1583
if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1)
1584
return 0;
1585
1586
/*
1587
* Receiving this packet may or may not cause us to emit an ACK frame.
1588
* We may not emit an ACK frame yet if we have not yet received a threshold
1589
* number of packets.
1590
*/
1591
if (pkt->is_ack_eliciting)
1592
ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing);
1593
1594
/* Update the ECN counters according to which ECN signal we got, if any. */
1595
switch (pkt->ecn) {
1596
case OSSL_ACKM_ECN_ECT0:
1597
++ackm->rx_ect0[pkt->pkt_space];
1598
break;
1599
case OSSL_ACKM_ECN_ECT1:
1600
++ackm->rx_ect1[pkt->pkt_space];
1601
break;
1602
case OSSL_ACKM_ECN_ECNCE:
1603
++ackm->rx_ecnce[pkt->pkt_space];
1604
break;
1605
default:
1606
break;
1607
}
1608
1609
return 1;
1610
}
1611
1612
static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space,
1613
OSSL_QUIC_FRAME_ACK *ack)
1614
{
1615
struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);
1616
UINT_SET_ITEM *x;
1617
size_t i = 0;
1618
1619
/*
1620
* Copy out ranges from the PN set, starting at the end, until we reach our
1621
* maximum number of ranges.
1622
*/
1623
for (x = ossl_list_uint_set_tail(&h->set);
1624
x != NULL && i < OSSL_NELEM(ackm->ack_ranges);
1625
x = ossl_list_uint_set_prev(x), ++i) {
1626
ackm->ack_ranges[pkt_space][i].start = x->range.start;
1627
ackm->ack_ranges[pkt_space][i].end = x->range.end;
1628
}
1629
1630
ack->ack_ranges = ackm->ack_ranges[pkt_space];
1631
ack->num_ack_ranges = i;
1632
}
1633
1634
const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm,
1635
int pkt_space)
1636
{
1637
OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space];
1638
OSSL_TIME now = ackm->now(ackm->now_arg);
1639
1640
ackm_fill_rx_ack_ranges(ackm, pkt_space, ack);
1641
1642
if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space])
1643
&& ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0
1644
&& pkt_space == QUIC_PN_SPACE_APP)
1645
ack->delay_time = ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]);
1646
else
1647
ack->delay_time = ossl_time_zero();
1648
1649
ack->ect0 = ackm->rx_ect0[pkt_space];
1650
ack->ect1 = ackm->rx_ect1[pkt_space];
1651
ack->ecnce = ackm->rx_ecnce[pkt_space];
1652
ack->ecn_present = 1;
1653
1654
ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0;
1655
1656
ackm->rx_ack_generated[pkt_space] = 1;
1657
ackm->rx_ack_desired[pkt_space] = 0;
1658
ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
1659
return ack;
1660
}
1661
1662
OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space)
1663
{
1664
if (ackm->rx_ack_desired[pkt_space])
1665
/* Already desired, deadline is now. */
1666
return ossl_time_zero();
1667
1668
return ackm->rx_ack_flush_deadline[pkt_space];
1669
}
1670
1671
int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space)
1672
{
1673
struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);
1674
1675
return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0;
1676
}
1677
1678
void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm,
1679
void (*fn)(OSSL_TIME deadline,
1680
void *arg),
1681
void *arg)
1682
{
1683
ackm->loss_detection_deadline_cb = fn;
1684
ackm->loss_detection_deadline_cb_arg = arg;
1685
}
1686
1687
void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm,
1688
void (*fn)(OSSL_TIME deadline,
1689
int pkt_space,
1690
void *arg),
1691
void *arg)
1692
{
1693
ackm->ack_deadline_cb = fn;
1694
ackm->ack_deadline_cb_arg = arg;
1695
}
1696
1697
int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm,
1698
int pkt_space, QUIC_PN pn)
1699
{
1700
struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space);
1701
OSSL_ACKM_TX_PKT *pkt;
1702
1703
pkt = tx_pkt_history_by_pkt_num(h, pn);
1704
if (pkt == NULL)
1705
return 0;
1706
1707
tx_pkt_history_remove(h, pkt->pkt_num);
1708
pkt->lnext = NULL;
1709
ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1);
1710
return 1;
1711
}
1712
1713
OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm)
1714
{
1715
OSSL_TIME duration;
1716
OSSL_RTT_INFO rtt;
1717
1718
ossl_statm_get_rtt_info(ackm->statm, &rtt);
1719
1720
duration = ossl_time_add(rtt.smoothed_rtt,
1721
ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4),
1722
ossl_ticks2time(K_GRANULARITY)));
1723
if (!ossl_time_is_infinite(ackm->rx_max_ack_delay))
1724
duration = ossl_time_add(duration, ackm->rx_max_ack_delay);
1725
1726
return duration;
1727
}
1728
1729
QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space)
1730
{
1731
return ackm->largest_acked_pkt[pkt_space];
1732
}
1733
1734
void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay)
1735
{
1736
ackm->rx_max_ack_delay = rx_max_ack_delay;
1737
}
1738
1739
void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay)
1740
{
1741
ackm->tx_max_ack_delay = tx_max_ack_delay;
1742
}
1743
1744