Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/netinet/if_ether.c
102428 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1982, 1986, 1988, 1993
5
* The Regents of the University of California. All rights reserved.
6
*
7
* Redistribution and use in source and binary forms, with or without
8
* modification, are permitted provided that the following conditions
9
* are met:
10
* 1. Redistributions of source code must retain the above copyright
11
* notice, this list of conditions and the following disclaimer.
12
* 2. Redistributions in binary form must reproduce the above copyright
13
* notice, this list of conditions and the following disclaimer in the
14
* documentation and/or other materials provided with the distribution.
15
* 3. Neither the name of the University nor the names of its contributors
16
* may be used to endorse or promote products derived from this software
17
* without specific prior written permission.
18
*
19
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29
* SUCH DAMAGE.
30
*/
31
32
/*
33
* Ethernet address resolution protocol.
34
* TODO:
35
* add "inuse/lock" bit (or ref. count) along with valid bit
36
*/
37
38
#include "opt_inet.h"
39
40
#include <sys/param.h>
41
#include <sys/eventhandler.h>
42
#include <sys/kernel.h>
43
#include <sys/lock.h>
44
#include <sys/queue.h>
45
#include <sys/sysctl.h>
46
#include <sys/systm.h>
47
#include <sys/mbuf.h>
48
#include <sys/malloc.h>
49
#include <sys/proc.h>
50
#include <sys/socket.h>
51
#include <sys/syslog.h>
52
53
#include <net/if.h>
54
#include <net/if_var.h>
55
#include <net/if_dl.h>
56
#include <net/if_private.h>
57
#include <net/if_types.h>
58
#include <net/if_bridgevar.h>
59
#include <net/netisr.h>
60
#include <net/ethernet.h>
61
#include <net/route.h>
62
#include <net/route/nhop.h>
63
#include <net/vnet.h>
64
65
#include <netinet/in.h>
66
#include <netinet/in_fib.h>
67
#include <netinet/in_var.h>
68
#include <net/if_llatbl.h>
69
#include <netinet/if_ether.h>
70
#ifdef INET
71
#include <netinet/ip_carp.h>
72
#endif
73
74
#include <security/mac/mac_framework.h>
75
76
#define SIN(s) ((const struct sockaddr_in *)(s))
77
78
static struct timeval arp_lastlog;
79
static int arp_curpps;
80
static int arp_maxpps = 1;
81
82
/* Simple ARP state machine */
83
enum arp_llinfo_state {
84
ARP_LLINFO_INCOMPLETE = 0, /* No LLE data */
85
ARP_LLINFO_REACHABLE, /* LLE is valid */
86
ARP_LLINFO_VERIFY, /* LLE is valid, need refresh */
87
ARP_LLINFO_DELETED, /* LLE is deleted */
88
};
89
90
SYSCTL_DECL(_net_link_ether);
91
static SYSCTL_NODE(_net_link_ether, PF_INET, inet,
92
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
93
"");
94
static SYSCTL_NODE(_net_link_ether, PF_ARP, arp,
95
CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
96
"");
97
98
/* timer values */
99
VNET_DEFINE_STATIC(int, arpt_keep) = (20*60); /* once resolved, good for 20
100
* minutes */
101
VNET_DEFINE_STATIC(int, arp_maxtries) = 5;
102
VNET_DEFINE_STATIC(int, arp_proxyall) = 0;
103
VNET_DEFINE_STATIC(int, arpt_down) = 20; /* keep incomplete entries for
104
* 20 seconds */
105
VNET_DEFINE_STATIC(int, arpt_rexmit) = 1; /* retransmit arp entries, sec*/
106
VNET_PCPUSTAT_DEFINE(struct arpstat, arpstat); /* ARP statistics, see if_arp.h */
107
VNET_PCPUSTAT_SYSINIT(arpstat);
108
109
#ifdef VIMAGE
110
VNET_PCPUSTAT_SYSUNINIT(arpstat);
111
#endif /* VIMAGE */
112
113
VNET_DEFINE_STATIC(int, arp_maxhold) = 16;
114
115
#define V_arpt_keep VNET(arpt_keep)
116
#define V_arpt_down VNET(arpt_down)
117
#define V_arpt_rexmit VNET(arpt_rexmit)
118
#define V_arp_maxtries VNET(arp_maxtries)
119
#define V_arp_proxyall VNET(arp_proxyall)
120
#define V_arp_maxhold VNET(arp_maxhold)
121
122
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_age, CTLFLAG_VNET | CTLFLAG_RW,
123
&VNET_NAME(arpt_keep), 0,
124
"ARP entry lifetime in seconds");
125
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxtries, CTLFLAG_VNET | CTLFLAG_RW,
126
&VNET_NAME(arp_maxtries), 0,
127
"ARP resolution attempts before returning error");
128
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, proxyall, CTLFLAG_VNET | CTLFLAG_RW,
129
&VNET_NAME(arp_proxyall), 0,
130
"Enable proxy ARP for all suitable requests");
131
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, wait, CTLFLAG_VNET | CTLFLAG_RW,
132
&VNET_NAME(arpt_down), 0,
133
"Incomplete ARP entry lifetime in seconds");
134
SYSCTL_VNET_PCPUSTAT(_net_link_ether_arp, OID_AUTO, stats, struct arpstat,
135
arpstat, "ARP statistics (struct arpstat, net/if_arp.h)");
136
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, maxhold, CTLFLAG_VNET | CTLFLAG_RW,
137
&VNET_NAME(arp_maxhold), 0,
138
"Number of packets to hold per ARP entry");
139
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, max_log_per_second,
140
CTLFLAG_RW, &arp_maxpps, 0,
141
"Maximum number of remotely triggered ARP messages that can be "
142
"logged per second");
143
144
/*
145
* Due to the exponential backoff algorithm used for the interval between GARP
146
* retransmissions, the maximum number of retransmissions is limited for
147
* sanity. This limit corresponds to a maximum interval between retransmissions
148
* of 2^16 seconds ~= 18 hours.
149
*
150
* Making this limit more dynamic is more complicated than worthwhile,
151
* especially since sending out GARPs spaced days apart would be of little
152
* use. A maximum dynamic limit would look something like:
153
*
154
* const int max = fls(INT_MAX / hz) - 1;
155
*/
156
#define MAX_GARP_RETRANSMITS 16
157
static int sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS);
158
VNET_DEFINE_STATIC(int, garp_rexmit_count) = 0; /* GARP retransmission setting. */
159
#define V_garp_rexmit_count VNET(garp_rexmit_count)
160
161
SYSCTL_PROC(_net_link_ether_inet, OID_AUTO, garp_rexmit_count,
162
CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_MPSAFE|CTLFLAG_VNET,
163
&VNET_NAME(garp_rexmit_count), 0, sysctl_garp_rexmit, "I",
164
"Number of times to retransmit GARP packets;"
165
" 0 to disable, maximum of 16");
166
167
VNET_DEFINE_STATIC(int, arp_log_level) = LOG_INFO; /* Min. log(9) level. */
168
#define V_arp_log_level VNET(arp_log_level)
169
SYSCTL_INT(_net_link_ether_arp, OID_AUTO, log_level, CTLFLAG_VNET | CTLFLAG_RW,
170
&VNET_NAME(arp_log_level), 0,
171
"Minimum log(9) level for recording rate limited arp log messages. "
172
"The higher will be log more (emerg=0, info=6 (default), debug=7).");
173
#define ARP_LOG(pri, ...) do { \
174
if ((pri) <= V_arp_log_level && \
175
ppsratecheck(&arp_lastlog, &arp_curpps, arp_maxpps)) \
176
log((pri), "arp: " __VA_ARGS__); \
177
} while (0)
178
179
static void arpintr(struct mbuf *);
180
static void arptimer(void *);
181
#ifdef INET
182
static void in_arpinput(struct mbuf *);
183
#endif
184
185
static void arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr,
186
struct ifnet *ifp, int bridged, struct llentry *la);
187
static void arp_mark_lle_reachable(struct llentry *la, struct ifnet *ifp);
188
static void arp_iflladdr(void *arg __unused, struct ifnet *ifp);
189
190
static eventhandler_tag iflladdr_tag;
191
192
static const struct netisr_handler arp_nh = {
193
.nh_name = "arp",
194
.nh_handler = arpintr,
195
.nh_proto = NETISR_ARP,
196
.nh_policy = NETISR_POLICY_SOURCE,
197
};
198
199
/*
200
* Timeout routine. Age arp_tab entries periodically.
201
*/
202
static void
203
arptimer(void *arg)
204
{
205
struct llentry *lle = (struct llentry *)arg;
206
struct ifnet *ifp;
207
208
if (lle->la_flags & LLE_STATIC) {
209
return;
210
}
211
LLE_WLOCK(lle);
212
if (callout_pending(&lle->lle_timer)) {
213
/*
214
* Here we are a bit odd here in the treatment of
215
* active/pending. If the pending bit is set, it got
216
* rescheduled before I ran. The active
217
* bit we ignore, since if it was stopped
218
* in ll_tablefree() and was currently running
219
* it would have return 0 so the code would
220
* not have deleted it since the callout could
221
* not be stopped so we want to go through
222
* with the delete here now. If the callout
223
* was restarted, the pending bit will be back on and
224
* we just want to bail since the callout_reset would
225
* return 1 and our reference would have been removed
226
* by arpresolve() below.
227
*/
228
LLE_WUNLOCK(lle);
229
return;
230
}
231
ifp = lle->lle_tbl->llt_ifp;
232
CURVNET_SET(ifp->if_vnet);
233
234
switch (lle->ln_state) {
235
case ARP_LLINFO_REACHABLE:
236
237
/*
238
* Expiration time is approaching.
239
* Request usage feedback from the datapath.
240
* Change state and re-schedule ourselves.
241
*/
242
llentry_request_feedback(lle);
243
lle->ln_state = ARP_LLINFO_VERIFY;
244
callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
245
LLE_WUNLOCK(lle);
246
CURVNET_RESTORE();
247
return;
248
case ARP_LLINFO_VERIFY:
249
if (llentry_get_hittime(lle) > 0 && lle->la_preempt > 0) {
250
/* Entry was used, issue refresh request */
251
struct epoch_tracker et;
252
struct in_addr dst;
253
254
dst = lle->r_l3addr.addr4;
255
lle->la_preempt--;
256
callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
257
LLE_WUNLOCK(lle);
258
NET_EPOCH_ENTER(et);
259
arprequest(ifp, NULL, &dst, NULL);
260
NET_EPOCH_EXIT(et);
261
CURVNET_RESTORE();
262
return;
263
}
264
/* Nothing happened. Reschedule if not too late */
265
if (lle->la_expire > time_uptime) {
266
callout_schedule(&lle->lle_timer, hz * V_arpt_rexmit);
267
LLE_WUNLOCK(lle);
268
CURVNET_RESTORE();
269
return;
270
}
271
break;
272
case ARP_LLINFO_INCOMPLETE:
273
case ARP_LLINFO_DELETED:
274
break;
275
}
276
277
if ((lle->la_flags & LLE_DELETED) == 0) {
278
int evt;
279
280
if (lle->la_flags & LLE_VALID)
281
evt = LLENTRY_EXPIRED;
282
else
283
evt = LLENTRY_TIMEDOUT;
284
EVENTHANDLER_INVOKE(lle_event, lle, evt);
285
}
286
287
callout_stop(&lle->lle_timer);
288
289
/* XXX: LOR avoidance. We still have ref on lle. */
290
LLE_WUNLOCK(lle);
291
LLTABLE_LOCK(LLTABLE(ifp));
292
LLE_WLOCK(lle);
293
294
/* Guard against race with other llentry_free(). */
295
if (lle->la_flags & LLE_LINKED) {
296
LLE_REMREF(lle);
297
lltable_unlink_entry(lle->lle_tbl, lle);
298
}
299
LLTABLE_UNLOCK(LLTABLE(ifp));
300
301
size_t pkts_dropped = llentry_free(lle);
302
303
ARPSTAT_ADD(dropped, pkts_dropped);
304
ARPSTAT_INC(timeouts);
305
306
CURVNET_RESTORE();
307
}
308
309
/*
310
* Stores link-layer header for @ifp in format suitable for if_output()
311
* into buffer @buf. Resulting header length is stored in @bufsize.
312
*
313
* Returns 0 on success.
314
*/
315
static int
316
arp_fillheader(struct ifnet *ifp, struct arphdr *ah, int bcast, u_char *buf,
317
size_t *bufsize)
318
{
319
struct if_encap_req ereq;
320
int error;
321
322
bzero(buf, *bufsize);
323
bzero(&ereq, sizeof(ereq));
324
ereq.buf = buf;
325
ereq.bufsize = *bufsize;
326
ereq.rtype = IFENCAP_LL;
327
ereq.family = AF_ARP;
328
ereq.lladdr = ar_tha(ah);
329
ereq.hdata = (u_char *)ah;
330
if (bcast)
331
ereq.flags = IFENCAP_FLAG_BROADCAST;
332
error = ifp->if_requestencap(ifp, &ereq);
333
if (error == 0)
334
*bufsize = ereq.bufsize;
335
336
return (error);
337
}
338
339
/*
340
* Broadcast an ARP request. Caller specifies:
341
* - arp header source ip address
342
* - arp header target ip address
343
* - arp header source ethernet address
344
*/
345
static int
346
arprequest_internal(struct ifnet *ifp, const struct in_addr *sip,
347
const struct in_addr *tip, u_char *enaddr)
348
{
349
struct mbuf *m;
350
struct arphdr *ah;
351
struct sockaddr sa;
352
u_char *carpaddr = NULL;
353
uint8_t linkhdr[LLE_MAX_LINKHDR];
354
size_t linkhdrsize;
355
struct route ro;
356
int error;
357
358
NET_EPOCH_ASSERT();
359
360
if (sip == NULL) {
361
/*
362
* The caller did not supply a source address, try to find
363
* a compatible one among those assigned to this interface.
364
*/
365
struct ifaddr *ifa;
366
367
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
368
if (ifa->ifa_addr->sa_family != AF_INET)
369
continue;
370
371
if (ifa->ifa_carp) {
372
if ((*carp_iamatch_p)(ifa, &carpaddr) == 0)
373
continue;
374
sip = &IA_SIN(ifa)->sin_addr;
375
} else {
376
carpaddr = NULL;
377
sip = &IA_SIN(ifa)->sin_addr;
378
}
379
380
if (0 == ((sip->s_addr ^ tip->s_addr) &
381
IA_MASKSIN(ifa)->sin_addr.s_addr))
382
break; /* found it. */
383
}
384
if (sip == NULL) {
385
printf("%s: cannot find matching address\n", __func__);
386
return (EADDRNOTAVAIL);
387
}
388
}
389
if (enaddr == NULL)
390
enaddr = carpaddr ? carpaddr : (u_char *)IF_LLADDR(ifp);
391
392
if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
393
return (ENOMEM);
394
m->m_len = sizeof(*ah) + 2 * sizeof(struct in_addr) +
395
2 * ifp->if_addrlen;
396
m->m_pkthdr.len = m->m_len;
397
M_ALIGN(m, m->m_len);
398
ah = mtod(m, struct arphdr *);
399
bzero((caddr_t)ah, m->m_len);
400
#ifdef MAC
401
mac_netinet_arp_send(ifp, m);
402
#endif
403
ah->ar_pro = htons(ETHERTYPE_IP);
404
ah->ar_hln = ifp->if_addrlen; /* hardware address length */
405
ah->ar_pln = sizeof(struct in_addr); /* protocol address length */
406
ah->ar_op = htons(ARPOP_REQUEST);
407
bcopy(enaddr, ar_sha(ah), ah->ar_hln);
408
bcopy(sip, ar_spa(ah), ah->ar_pln);
409
bcopy(tip, ar_tpa(ah), ah->ar_pln);
410
sa.sa_family = AF_ARP;
411
sa.sa_len = 2;
412
413
/* Calculate link header for sending frame */
414
bzero(&ro, sizeof(ro));
415
linkhdrsize = sizeof(linkhdr);
416
error = arp_fillheader(ifp, ah, 1, linkhdr, &linkhdrsize);
417
if (error != 0 && error != EAFNOSUPPORT) {
418
m_freem(m);
419
ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
420
if_name(ifp), error);
421
return (error);
422
}
423
424
ro.ro_prepend = linkhdr;
425
ro.ro_plen = linkhdrsize;
426
ro.ro_flags = 0;
427
428
m->m_flags |= M_BCAST;
429
m_clrprotoflags(m); /* Avoid confusing lower layers. */
430
error = (*ifp->if_output)(ifp, m, &sa, &ro);
431
ARPSTAT_INC(txrequests);
432
if (error) {
433
ARPSTAT_INC(txerrors);
434
ARP_LOG(LOG_DEBUG, "Failed to send ARP packet on %s: %d\n",
435
if_name(ifp), error);
436
}
437
return (error);
438
}
439
440
void
441
arprequest(struct ifnet *ifp, const struct in_addr *sip,
442
const struct in_addr *tip, u_char *enaddr)
443
{
444
445
(void) arprequest_internal(ifp, sip, tip, enaddr);
446
}
447
448
/*
449
* Resolve an IP address into an ethernet address - heavy version.
450
* Used internally by arpresolve().
451
* We have already checked that we can't use an existing lle without
452
* modification so we have to acquire an LLE_EXCLUSIVE lle lock.
453
*
454
* On success, desten and pflags are filled in and the function returns 0;
455
* If the packet must be held pending resolution, we return EWOULDBLOCK
456
* On other errors, we return the corresponding error code.
457
* Note that m_freem() handles NULL.
458
*/
459
static int
460
arpresolve_full(struct ifnet *ifp, int is_gw, int flags, struct mbuf *m,
461
const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
462
struct llentry **plle)
463
{
464
struct llentry *la = NULL, *la_tmp;
465
int error, renew;
466
char *lladdr;
467
int ll_len;
468
469
NET_EPOCH_ASSERT();
470
471
if (pflags != NULL)
472
*pflags = 0;
473
if (plle != NULL)
474
*plle = NULL;
475
476
if ((flags & LLE_CREATE) == 0)
477
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
478
if (la == NULL && (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) == 0) {
479
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
480
if (la == NULL) {
481
char addrbuf[INET_ADDRSTRLEN];
482
483
log(LOG_DEBUG,
484
"arpresolve: can't allocate llinfo for %s on %s\n",
485
inet_ntoa_r(SIN(dst)->sin_addr, addrbuf),
486
if_name(ifp));
487
m_freem(m);
488
return (EINVAL);
489
}
490
491
LLTABLE_LOCK(LLTABLE(ifp));
492
LLE_WLOCK(la);
493
la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
494
/* Prefer ANY existing lle over newly-created one */
495
if (la_tmp == NULL)
496
lltable_link_entry(LLTABLE(ifp), la);
497
LLTABLE_UNLOCK(LLTABLE(ifp));
498
if (la_tmp != NULL) {
499
lltable_free_entry(LLTABLE(ifp), la);
500
la = la_tmp;
501
}
502
}
503
if (la == NULL) {
504
m_freem(m);
505
return (EINVAL);
506
}
507
508
if ((la->la_flags & LLE_VALID) &&
509
((la->la_flags & LLE_STATIC) || la->la_expire > time_uptime)) {
510
if (flags & LLE_ADDRONLY) {
511
lladdr = la->ll_addr;
512
ll_len = ifp->if_addrlen;
513
} else {
514
lladdr = la->r_linkdata;
515
ll_len = la->r_hdrlen;
516
}
517
bcopy(lladdr, desten, ll_len);
518
519
/* Notify LLE code that the entry was used by datapath */
520
llentry_provide_feedback(la);
521
if (pflags != NULL)
522
*pflags = la->la_flags & (LLE_VALID|LLE_IFADDR);
523
if (plle) {
524
LLE_ADDREF(la);
525
*plle = la;
526
}
527
LLE_WUNLOCK(la);
528
return (0);
529
}
530
531
renew = (la->la_asked == 0 || la->la_expire != time_uptime);
532
533
/*
534
* There is an arptab entry, but no ethernet address
535
* response yet. Add the mbuf to the list, dropping
536
* the oldest packet if we have exceeded the system
537
* setting.
538
*/
539
if (m != NULL) {
540
size_t dropped = lltable_append_entry_queue(la, m, V_arp_maxhold);
541
ARPSTAT_ADD(dropped, dropped);
542
}
543
544
/*
545
* Return EWOULDBLOCK if we have tried less than arp_maxtries. It
546
* will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
547
* if we have already sent arp_maxtries ARP requests. Retransmit the
548
* ARP request, but not faster than one request per second.
549
*/
550
if (la->la_asked < V_arp_maxtries)
551
error = EWOULDBLOCK; /* First request. */
552
else
553
error = is_gw != 0 ? EHOSTUNREACH : EHOSTDOWN;
554
555
if (renew) {
556
int canceled, e;
557
558
LLE_ADDREF(la);
559
la->la_expire = time_uptime;
560
canceled = callout_reset(&la->lle_timer, hz * V_arpt_down,
561
arptimer, la);
562
if (canceled)
563
LLE_REMREF(la);
564
la->la_asked++;
565
LLE_WUNLOCK(la);
566
e = arprequest_internal(ifp, NULL, &SIN(dst)->sin_addr, NULL);
567
/*
568
* Only overwrite 'error' in case of error; in case of success
569
* the proper return value was already set above.
570
*/
571
if (e != 0)
572
return (e);
573
return (error);
574
}
575
576
LLE_WUNLOCK(la);
577
return (error);
578
}
579
580
/*
581
* Lookups link header based on an IP address.
582
* On input:
583
* ifp is the interface we use
584
* is_gw != 0 if @dst represents gateway to some destination
585
* m is the mbuf. May be NULL if we don't have a packet.
586
* dst is the next hop,
587
* desten is the storage to put LL header.
588
* flags returns subset of lle flags: LLE_VALID | LLE_IFADDR
589
*
590
* On success, full/partial link header and flags are filled in and
591
* the function returns 0.
592
* If the packet must be held pending resolution, we return EWOULDBLOCK
593
* On other errors, we return the corresponding error code.
594
* Note that m_freem() handles NULL.
595
*/
596
int
597
arpresolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
598
const struct sockaddr *dst, u_char *desten, uint32_t *pflags,
599
struct llentry **plle)
600
{
601
struct llentry *la = NULL;
602
603
NET_EPOCH_ASSERT();
604
605
if (pflags != NULL)
606
*pflags = 0;
607
if (plle != NULL)
608
*plle = NULL;
609
610
if (m != NULL) {
611
if (m->m_flags & M_BCAST) {
612
/* broadcast */
613
(void)memcpy(desten,
614
ifp->if_broadcastaddr, ifp->if_addrlen);
615
return (0);
616
}
617
if (m->m_flags & M_MCAST) {
618
/* multicast */
619
ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
620
return (0);
621
}
622
}
623
624
la = lla_lookup(LLTABLE(ifp), plle ? LLE_EXCLUSIVE : LLE_UNLOCKED, dst);
625
if (la != NULL && (la->r_flags & RLLE_VALID) != 0) {
626
/* Entry found, let's copy lle info */
627
bcopy(la->r_linkdata, desten, la->r_hdrlen);
628
if (pflags != NULL)
629
*pflags = LLE_VALID | (la->r_flags & RLLE_IFADDR);
630
/* Notify the LLE handling code that the entry was used. */
631
llentry_provide_feedback(la);
632
if (plle) {
633
LLE_ADDREF(la);
634
*plle = la;
635
LLE_WUNLOCK(la);
636
}
637
return (0);
638
}
639
if (plle && la)
640
LLE_WUNLOCK(la);
641
642
return (arpresolve_full(ifp, is_gw, la == NULL ? LLE_CREATE : 0, m, dst,
643
desten, pflags, plle));
644
}
645
646
/*
647
* Common length and type checks are done here,
648
* then the protocol-specific routine is called.
649
*/
650
static void
651
arpintr(struct mbuf *m)
652
{
653
struct arphdr *ar;
654
struct ifnet *ifp;
655
char *layer;
656
int hlen;
657
658
ifp = m->m_pkthdr.rcvif;
659
660
if (m->m_len < sizeof(struct arphdr) &&
661
((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
662
ARP_LOG(LOG_NOTICE, "packet with short header received on %s\n",
663
if_name(ifp));
664
return;
665
}
666
ar = mtod(m, struct arphdr *);
667
668
/* Check if length is sufficient */
669
if (m->m_len < arphdr_len(ar)) {
670
m = m_pullup(m, arphdr_len(ar));
671
if (m == NULL) {
672
ARP_LOG(LOG_NOTICE, "short packet received on %s\n",
673
if_name(ifp));
674
return;
675
}
676
ar = mtod(m, struct arphdr *);
677
}
678
679
hlen = 0;
680
layer = "";
681
switch (ntohs(ar->ar_hrd)) {
682
case ARPHRD_ETHER:
683
hlen = ETHER_ADDR_LEN; /* RFC 826 */
684
layer = "ethernet";
685
break;
686
case ARPHRD_IEEE802:
687
hlen = ETHER_ADDR_LEN;
688
layer = "ieee802";
689
break;
690
case ARPHRD_INFINIBAND:
691
hlen = 20; /* RFC 4391, INFINIBAND_ALEN */
692
layer = "infiniband";
693
break;
694
case ARPHRD_IEEE1394:
695
hlen = 0; /* SHALL be 16 */ /* RFC 2734 */
696
layer = "firewire";
697
698
/*
699
* Restrict too long hardware addresses.
700
* Currently we are capable of handling 20-byte
701
* addresses ( sizeof(lle->ll_addr) )
702
*/
703
if (ar->ar_hln >= 20)
704
hlen = 16;
705
break;
706
default:
707
ARP_LOG(LOG_NOTICE,
708
"packet with unknown hardware format 0x%02d received on "
709
"%s\n", ntohs(ar->ar_hrd), if_name(ifp));
710
m_freem(m);
711
return;
712
}
713
714
if (hlen != 0 && hlen != ar->ar_hln) {
715
ARP_LOG(LOG_NOTICE,
716
"packet with invalid %s address length %d received on %s\n",
717
layer, ar->ar_hln, if_name(ifp));
718
m_freem(m);
719
return;
720
}
721
722
ARPSTAT_INC(received);
723
switch (ntohs(ar->ar_pro)) {
724
#ifdef INET
725
case ETHERTYPE_IP:
726
in_arpinput(m);
727
return;
728
#endif
729
}
730
m_freem(m);
731
}
732
733
#ifdef INET
734
/*
735
* ARP for Internet protocols on 10 Mb/s Ethernet.
736
* Algorithm is that given in RFC 826.
737
* In addition, a sanity check is performed on the sender
738
* protocol address, to catch impersonators.
739
* We no longer handle negotiations for use of trailer protocol:
740
* Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
741
* along with IP replies if we wanted trailers sent to us,
742
* and also sent them in response to IP replies.
743
* This allowed either end to announce the desire to receive
744
* trailer packets.
745
* We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
746
* but formerly didn't normally send requests.
747
*/
748
static int log_arp_wrong_iface = 1;
749
static int log_arp_movements = 1;
750
static int log_arp_permanent_modify = 1;
751
static int allow_multicast = 0;
752
753
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
754
&log_arp_wrong_iface, 0,
755
"log arp packets arriving on the wrong interface");
756
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
757
&log_arp_movements, 0,
758
"log arp replies from MACs different than the one in the cache");
759
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
760
&log_arp_permanent_modify, 0,
761
"log arp replies from MACs different than the one in the permanent arp entry");
762
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, allow_multicast, CTLFLAG_RW,
763
&allow_multicast, 0, "accept multicast addresses");
764
765
static void
766
in_arpinput(struct mbuf *m)
767
{
768
struct arphdr *ah;
769
struct ifnet *ifp = m->m_pkthdr.rcvif;
770
struct llentry *la = NULL, *la_tmp;
771
struct ifaddr *ifa;
772
struct in_ifaddr *ia;
773
struct sockaddr sa;
774
struct in_addr isaddr, itaddr, myaddr;
775
u_int8_t *enaddr = NULL;
776
int op;
777
int bridged = 0, is_bridge = 0;
778
int carped;
779
struct sockaddr_in sin;
780
struct sockaddr *dst;
781
struct nhop_object *nh;
782
uint8_t linkhdr[LLE_MAX_LINKHDR];
783
struct route ro;
784
size_t linkhdrsize;
785
int lladdr_off;
786
int error;
787
char addrbuf[INET_ADDRSTRLEN];
788
789
NET_EPOCH_ASSERT();
790
791
sin.sin_len = sizeof(struct sockaddr_in);
792
sin.sin_family = AF_INET;
793
sin.sin_addr.s_addr = 0;
794
795
if (ifp->if_bridge)
796
bridged = 1;
797
if (ifp->if_type == IFT_BRIDGE)
798
is_bridge = 1;
799
800
/*
801
* We already have checked that mbuf contains enough contiguous data
802
* to hold entire arp message according to the arp header.
803
*/
804
ah = mtod(m, struct arphdr *);
805
806
/*
807
* ARP is only for IPv4 so we can reject packets with
808
* a protocol length not equal to an IPv4 address.
809
*/
810
if (ah->ar_pln != sizeof(struct in_addr)) {
811
ARP_LOG(LOG_NOTICE, "requested protocol length != %zu\n",
812
sizeof(struct in_addr));
813
goto drop;
814
}
815
816
if (allow_multicast == 0 && ETHER_IS_MULTICAST(ar_sha(ah))) {
817
ARP_LOG(LOG_NOTICE, "%*D is multicast\n",
818
ifp->if_addrlen, (u_char *)ar_sha(ah), ":");
819
goto drop;
820
}
821
822
op = ntohs(ah->ar_op);
823
(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
824
(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
825
826
if (op == ARPOP_REPLY)
827
ARPSTAT_INC(rxreplies);
828
829
/*
830
* For a bridge, we want to check the address irrespective
831
* of the receive interface. (This will change slightly
832
* when we have clusters of interfaces).
833
*/
834
CK_LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
835
if (((bridged && bridge_same_p(ia->ia_ifp->if_bridge, ifp->if_bridge)) ||
836
ia->ia_ifp == ifp) &&
837
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr &&
838
(ia->ia_ifa.ifa_carp == NULL ||
839
(*carp_iamatch_p)(&ia->ia_ifa, &enaddr))) {
840
ifa_ref(&ia->ia_ifa);
841
goto match;
842
}
843
}
844
CK_LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
845
if (((bridged && bridge_same_p(ia->ia_ifp->if_bridge, ifp->if_bridge)) ||
846
ia->ia_ifp == ifp) &&
847
isaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
848
ifa_ref(&ia->ia_ifa);
849
goto match;
850
}
851
852
#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
853
(bridge_get_softc_p(ia->ia_ifp) == ifp->if_softc && \
854
!bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) && \
855
addr == ia->ia_addr.sin_addr.s_addr)
856
/*
857
* Check the case when bridge shares its MAC address with
858
* some of its children, so packets are claimed by bridge
859
* itself (bridge_input() does it first), but they are really
860
* meant to be destined to the bridge member.
861
*/
862
if (is_bridge) {
863
CK_LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
864
if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
865
ifa_ref(&ia->ia_ifa);
866
ifp = ia->ia_ifp;
867
goto match;
868
}
869
}
870
}
871
#undef BDG_MEMBER_MATCHES_ARP
872
873
/*
874
* No match, use the first inet address on the receive interface
875
* as a dummy address for the rest of the function.
876
*/
877
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
878
if (ifa->ifa_addr->sa_family == AF_INET &&
879
(ifa->ifa_carp == NULL ||
880
(*carp_iamatch_p)(ifa, &enaddr))) {
881
ia = ifatoia(ifa);
882
ifa_ref(ifa);
883
goto match;
884
}
885
886
/*
887
* If bridging, fall back to using any inet address.
888
*/
889
if (!bridged || (ia = CK_STAILQ_FIRST(&V_in_ifaddrhead)) == NULL)
890
goto drop;
891
ifa_ref(&ia->ia_ifa);
892
match:
893
if (!enaddr)
894
enaddr = (u_int8_t *)IF_LLADDR(ifp);
895
carped = (ia->ia_ifa.ifa_carp != NULL);
896
myaddr = ia->ia_addr.sin_addr;
897
ifa_free(&ia->ia_ifa);
898
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
899
goto drop; /* it's from me, ignore it. */
900
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
901
ARP_LOG(LOG_NOTICE, "link address is broadcast for IP address "
902
"%s!\n", inet_ntoa_r(isaddr, addrbuf));
903
goto drop;
904
}
905
906
if (ifp->if_addrlen != ah->ar_hln) {
907
ARP_LOG(LOG_WARNING, "from %*D: addr len: new %d, "
908
"i/f %d (ignored)\n", ifp->if_addrlen,
909
(u_char *) ar_sha(ah), ":", ah->ar_hln,
910
ifp->if_addrlen);
911
goto drop;
912
}
913
914
/*
915
* Warn if another host is using the same IP address, but only if the
916
* IP address isn't 0.0.0.0, which is used for DHCP only, in which
917
* case we suppress the warning to avoid false positive complaints of
918
* potential misconfiguration.
919
*/
920
if (!bridged && !carped && isaddr.s_addr == myaddr.s_addr &&
921
myaddr.s_addr != 0) {
922
ARP_LOG(LOG_ERR, "%*D is using my IP address %s on %s!\n",
923
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
924
inet_ntoa_r(isaddr, addrbuf), ifp->if_xname);
925
itaddr = myaddr;
926
ARPSTAT_INC(dupips);
927
goto reply;
928
}
929
if (ifp->if_flags & IFF_STATICARP)
930
goto reply;
931
932
bzero(&sin, sizeof(sin));
933
sin.sin_len = sizeof(struct sockaddr_in);
934
sin.sin_family = AF_INET;
935
sin.sin_addr = isaddr;
936
dst = (struct sockaddr *)&sin;
937
la = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
938
if (la != NULL)
939
arp_check_update_lle(ah, isaddr, ifp, bridged, la);
940
else if (itaddr.s_addr == myaddr.s_addr) {
941
/*
942
* Request/reply to our address, but no lle exists yet.
943
* Calculate full link prepend to use in lle.
944
*/
945
linkhdrsize = sizeof(linkhdr);
946
if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
947
&linkhdrsize, &lladdr_off) != 0)
948
goto reply;
949
950
/* Allocate new entry */
951
la = lltable_alloc_entry(LLTABLE(ifp), 0, dst);
952
if (la == NULL) {
953
/*
954
* lle creation may fail if source address belongs
955
* to non-directly connected subnet. However, we
956
* will try to answer the request instead of dropping
957
* frame.
958
*/
959
goto reply;
960
}
961
lltable_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
962
lladdr_off);
963
964
LLTABLE_LOCK(LLTABLE(ifp));
965
LLE_WLOCK(la);
966
la_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
967
968
/*
969
* Check if lle still does not exists.
970
* If it does, that means that we either
971
* 1) have configured it explicitly, via
972
* 1a) 'arp -s' static entry or
973
* 1b) interface address static record
974
* or
975
* 2) it was the result of sending first packet to-host
976
* or
977
* 3) it was another arp reply packet we handled in
978
* different thread.
979
*
980
* In all cases except 3) we definitely need to prefer
981
* existing lle. For the sake of simplicity, prefer any
982
* existing lle over newly-create one.
983
*/
984
if (la_tmp == NULL)
985
lltable_link_entry(LLTABLE(ifp), la);
986
LLTABLE_UNLOCK(LLTABLE(ifp));
987
988
if (la_tmp == NULL) {
989
arp_mark_lle_reachable(la, ifp);
990
LLE_WUNLOCK(la);
991
} else {
992
/* Free newly-create entry and handle packet */
993
lltable_free_entry(LLTABLE(ifp), la);
994
la = la_tmp;
995
la_tmp = NULL;
996
arp_check_update_lle(ah, isaddr, ifp, bridged, la);
997
/* arp_check_update_lle() returns @la unlocked */
998
}
999
la = NULL;
1000
}
1001
reply:
1002
if (op != ARPOP_REQUEST)
1003
goto drop;
1004
ARPSTAT_INC(rxrequests);
1005
1006
if (itaddr.s_addr == myaddr.s_addr) {
1007
/* Shortcut.. the receiving interface is the target. */
1008
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
1009
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
1010
} else {
1011
/*
1012
* Destination address is not ours. Check if
1013
* proxyarp entry exists or proxyarp is turned on globally.
1014
*/
1015
struct llentry *lle;
1016
1017
sin.sin_addr = itaddr;
1018
lle = lla_lookup(LLTABLE(ifp), 0, (struct sockaddr *)&sin);
1019
1020
if ((lle != NULL) && (lle->la_flags & LLE_PUB)) {
1021
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
1022
(void)memcpy(ar_sha(ah), lle->ll_addr, ah->ar_hln);
1023
LLE_RUNLOCK(lle);
1024
} else {
1025
if (lle != NULL)
1026
LLE_RUNLOCK(lle);
1027
1028
if (!V_arp_proxyall)
1029
goto drop;
1030
1031
NET_EPOCH_ASSERT();
1032
nh = fib4_lookup(ifp->if_fib, itaddr, 0, 0, 0);
1033
if (nh == NULL)
1034
goto drop;
1035
1036
/*
1037
* Don't send proxies for nodes on the same interface
1038
* as this one came out of, or we'll get into a fight
1039
* over who claims what Ether address.
1040
*/
1041
if (nh->nh_ifp == ifp)
1042
goto drop;
1043
1044
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
1045
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
1046
1047
/*
1048
* Also check that the node which sent the ARP packet
1049
* is on the interface we expect it to be on. This
1050
* avoids ARP chaos if an interface is connected to the
1051
* wrong network.
1052
*/
1053
1054
nh = fib4_lookup(ifp->if_fib, isaddr, 0, 0, 0);
1055
if (nh == NULL)
1056
goto drop;
1057
if (nh->nh_ifp != ifp) {
1058
ARP_LOG(LOG_INFO, "proxy: ignoring request"
1059
" from %s via %s\n",
1060
inet_ntoa_r(isaddr, addrbuf),
1061
ifp->if_xname);
1062
goto drop;
1063
}
1064
1065
#ifdef DEBUG_PROXY
1066
printf("arp: proxying for %s\n",
1067
inet_ntoa_r(itaddr, addrbuf));
1068
#endif
1069
}
1070
}
1071
1072
if (itaddr.s_addr == myaddr.s_addr &&
1073
IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
1074
/* RFC 3927 link-local IPv4; always reply by broadcast. */
1075
#ifdef DEBUG_LINKLOCAL
1076
printf("arp: sending reply for link-local addr %s\n",
1077
inet_ntoa_r(itaddr, addrbuf));
1078
#endif
1079
m->m_flags |= M_BCAST;
1080
m->m_flags &= ~M_MCAST;
1081
} else {
1082
/* default behaviour; never reply by broadcast. */
1083
m->m_flags &= ~(M_BCAST|M_MCAST);
1084
}
1085
(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
1086
(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
1087
ah->ar_op = htons(ARPOP_REPLY);
1088
ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
1089
m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
1090
m->m_pkthdr.len = m->m_len;
1091
m->m_pkthdr.rcvif = NULL;
1092
sa.sa_family = AF_ARP;
1093
sa.sa_len = 2;
1094
1095
/* Calculate link header for sending frame */
1096
bzero(&ro, sizeof(ro));
1097
linkhdrsize = sizeof(linkhdr);
1098
error = arp_fillheader(ifp, ah, 0, linkhdr, &linkhdrsize);
1099
1100
/*
1101
* arp_fillheader() may fail due to lack of support inside encap request
1102
* routing. This is not necessary an error, AF_ARP can/should be handled
1103
* by if_output().
1104
*/
1105
if (error != 0 && error != EAFNOSUPPORT) {
1106
ARP_LOG(LOG_ERR, "Failed to calculate ARP header on %s: %d\n",
1107
if_name(ifp), error);
1108
goto drop;
1109
}
1110
1111
ro.ro_prepend = linkhdr;
1112
ro.ro_plen = linkhdrsize;
1113
ro.ro_flags = 0;
1114
1115
m_clrprotoflags(m); /* Avoid confusing lower layers. */
1116
(*ifp->if_output)(ifp, m, &sa, &ro);
1117
ARPSTAT_INC(txreplies);
1118
return;
1119
1120
drop:
1121
m_freem(m);
1122
}
1123
#endif
1124
1125
static struct mbuf *
1126
arp_grab_holdchain(struct llentry *la)
1127
{
1128
struct mbuf *chain;
1129
1130
LLE_WLOCK_ASSERT(la);
1131
1132
chain = la->la_hold;
1133
la->la_hold = NULL;
1134
la->la_numheld = 0;
1135
1136
return (chain);
1137
}
1138
1139
static void
1140
arp_flush_holdchain(struct ifnet *ifp, struct llentry *la, struct mbuf *chain)
1141
{
1142
struct mbuf *m_hold, *m_hold_next;
1143
struct sockaddr_in sin;
1144
1145
NET_EPOCH_ASSERT();
1146
1147
struct route ro = {
1148
.ro_prepend = la->r_linkdata,
1149
.ro_plen = la->r_hdrlen,
1150
};
1151
1152
lltable_fill_sa_entry(la, (struct sockaddr *)&sin);
1153
1154
for (m_hold = chain; m_hold != NULL; m_hold = m_hold_next) {
1155
m_hold_next = m_hold->m_nextpkt;
1156
m_hold->m_nextpkt = NULL;
1157
/* Avoid confusing lower layers. */
1158
m_clrprotoflags(m_hold);
1159
(*ifp->if_output)(ifp, m_hold, (struct sockaddr *)&sin, &ro);
1160
}
1161
}
1162
1163
/*
1164
* Checks received arp data against existing @la.
1165
* Updates lle state/performs notification if necessary.
1166
*/
1167
static void
1168
arp_check_update_lle(struct arphdr *ah, struct in_addr isaddr, struct ifnet *ifp,
1169
int bridged, struct llentry *la)
1170
{
1171
uint8_t linkhdr[LLE_MAX_LINKHDR];
1172
size_t linkhdrsize;
1173
int lladdr_off;
1174
char addrbuf[INET_ADDRSTRLEN];
1175
1176
LLE_WLOCK_ASSERT(la);
1177
1178
/* the following is not an error when doing bridging */
1179
if (!bridged && la->lle_tbl->llt_ifp != ifp) {
1180
if (log_arp_wrong_iface)
1181
ARP_LOG(LOG_WARNING, "%s is on %s "
1182
"but got reply from %*D on %s\n",
1183
inet_ntoa_r(isaddr, addrbuf),
1184
la->lle_tbl->llt_ifp->if_xname,
1185
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
1186
ifp->if_xname);
1187
LLE_WUNLOCK(la);
1188
return;
1189
}
1190
if ((la->la_flags & LLE_VALID) &&
1191
bcmp(ar_sha(ah), la->ll_addr, ifp->if_addrlen)) {
1192
if (la->la_flags & LLE_STATIC) {
1193
LLE_WUNLOCK(la);
1194
if (log_arp_permanent_modify)
1195
ARP_LOG(LOG_ERR,
1196
"%*D attempts to modify "
1197
"permanent entry for %s on %s\n",
1198
ifp->if_addrlen,
1199
(u_char *)ar_sha(ah), ":",
1200
inet_ntoa_r(isaddr, addrbuf),
1201
ifp->if_xname);
1202
return;
1203
}
1204
if (log_arp_movements) {
1205
ARP_LOG(LOG_INFO, "%s moved from %*D "
1206
"to %*D on %s\n",
1207
inet_ntoa_r(isaddr, addrbuf),
1208
ifp->if_addrlen,
1209
(u_char *)la->ll_addr, ":",
1210
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
1211
ifp->if_xname);
1212
}
1213
}
1214
1215
/* Calculate full link prepend to use in lle */
1216
linkhdrsize = sizeof(linkhdr);
1217
if (lltable_calc_llheader(ifp, AF_INET, ar_sha(ah), linkhdr,
1218
&linkhdrsize, &lladdr_off) != 0) {
1219
LLE_WUNLOCK(la);
1220
return;
1221
}
1222
1223
/* Check if something has changed */
1224
if (memcmp(la->r_linkdata, linkhdr, linkhdrsize) != 0 ||
1225
(la->la_flags & LLE_VALID) == 0) {
1226
/* Try to perform LLE update */
1227
if (lltable_try_set_entry_addr(ifp, la, linkhdr, linkhdrsize,
1228
lladdr_off) == 0) {
1229
LLE_WUNLOCK(la);
1230
return;
1231
}
1232
1233
/* Clear fast path feedback request if set */
1234
llentry_mark_used(la);
1235
}
1236
1237
arp_mark_lle_reachable(la, ifp);
1238
1239
/*
1240
* The packets are all freed within the call to the output
1241
* routine.
1242
*
1243
* NB: The lock MUST be released before the call to the
1244
* output routine.
1245
*/
1246
if (la->la_hold != NULL) {
1247
struct mbuf *chain;
1248
1249
chain = arp_grab_holdchain(la);
1250
LLE_WUNLOCK(la);
1251
arp_flush_holdchain(ifp, la, chain);
1252
} else
1253
LLE_WUNLOCK(la);
1254
}
1255
1256
static void
1257
arp_mark_lle_reachable(struct llentry *la, struct ifnet *ifp)
1258
{
1259
int canceled, wtime;
1260
1261
LLE_WLOCK_ASSERT(la);
1262
1263
la->ln_state = ARP_LLINFO_REACHABLE;
1264
EVENTHANDLER_INVOKE(lle_event, la, LLENTRY_RESOLVED);
1265
1266
if ((ifp->if_flags & IFF_STICKYARP) != 0)
1267
la->la_flags |= LLE_STATIC;
1268
1269
if (!(la->la_flags & LLE_STATIC)) {
1270
LLE_ADDREF(la);
1271
la->la_expire = time_uptime + V_arpt_keep;
1272
wtime = V_arpt_keep - V_arp_maxtries * V_arpt_rexmit;
1273
if (wtime < 0)
1274
wtime = V_arpt_keep;
1275
canceled = callout_reset(&la->lle_timer,
1276
hz * wtime, arptimer, la);
1277
if (canceled)
1278
LLE_REMREF(la);
1279
}
1280
la->la_asked = 0;
1281
la->la_preempt = V_arp_maxtries;
1282
}
1283
1284
/*
1285
* Add permanent link-layer record for given interface address.
1286
*/
1287
static __noinline void
1288
arp_add_ifa_lle(struct ifnet *ifp, const struct sockaddr *dst)
1289
{
1290
struct llentry *lle, *lle_tmp;
1291
1292
/*
1293
* Interface address LLE record is considered static
1294
* because kernel code relies on LLE_STATIC flag to check
1295
* if these entries can be rewriten by arp updates.
1296
*/
1297
lle = lltable_alloc_entry(LLTABLE(ifp), LLE_IFADDR | LLE_STATIC, dst);
1298
if (lle == NULL) {
1299
log(LOG_INFO, "arp_ifinit: cannot create arp "
1300
"entry for interface address\n");
1301
return;
1302
}
1303
1304
LLTABLE_LOCK(LLTABLE(ifp));
1305
LLE_WLOCK(lle);
1306
/* Unlink any entry if exists */
1307
lle_tmp = lla_lookup(LLTABLE(ifp), LLE_EXCLUSIVE, dst);
1308
if (lle_tmp != NULL)
1309
lltable_unlink_entry(LLTABLE(ifp), lle_tmp);
1310
1311
lltable_link_entry(LLTABLE(ifp), lle);
1312
LLTABLE_UNLOCK(LLTABLE(ifp));
1313
1314
if (lle_tmp != NULL)
1315
EVENTHANDLER_INVOKE(lle_event, lle_tmp, LLENTRY_EXPIRED);
1316
1317
EVENTHANDLER_INVOKE(lle_event, lle, LLENTRY_RESOLVED);
1318
LLE_WUNLOCK(lle);
1319
if (lle_tmp != NULL)
1320
lltable_free_entry(LLTABLE(ifp), lle_tmp);
1321
}
1322
1323
/*
1324
* Handle the garp_rexmit_count. Like sysctl_handle_int(), but limits the range
1325
* of valid values.
1326
*/
1327
static int
1328
sysctl_garp_rexmit(SYSCTL_HANDLER_ARGS)
1329
{
1330
int error;
1331
int rexmit_count = *(int *)arg1;
1332
1333
error = sysctl_handle_int(oidp, &rexmit_count, 0, req);
1334
1335
/* Enforce limits on any new value that may have been set. */
1336
if (!error && req->newptr) {
1337
/* A new value was set. */
1338
if (rexmit_count < 0) {
1339
rexmit_count = 0;
1340
} else if (rexmit_count > MAX_GARP_RETRANSMITS) {
1341
rexmit_count = MAX_GARP_RETRANSMITS;
1342
}
1343
*(int *)arg1 = rexmit_count;
1344
}
1345
1346
return (error);
1347
}
1348
1349
/*
1350
* Retransmit a Gratuitous ARP (GARP) and, if necessary, schedule a callout to
1351
* retransmit it again. A pending callout owns a reference to the ifa.
1352
*/
1353
static void
1354
garp_rexmit(void *arg)
1355
{
1356
struct epoch_tracker et;
1357
struct in_ifaddr *ia = arg;
1358
1359
if (callout_pending(&ia->ia_garp_timer) ||
1360
!callout_active(&ia->ia_garp_timer)) {
1361
IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
1362
ifa_free(&ia->ia_ifa);
1363
return;
1364
}
1365
1366
NET_EPOCH_ENTER(et);
1367
CURVNET_SET(ia->ia_ifa.ifa_ifp->if_vnet);
1368
1369
/*
1370
* Drop lock while the ARP request is generated.
1371
*/
1372
IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
1373
1374
arprequest(ia->ia_ifa.ifa_ifp, &IA_SIN(ia)->sin_addr,
1375
&IA_SIN(ia)->sin_addr, IF_LLADDR(ia->ia_ifa.ifa_ifp));
1376
1377
/*
1378
* Increment the count of retransmissions. If the count has reached the
1379
* maximum value, stop sending the GARP packets. Otherwise, schedule
1380
* the callout to retransmit another GARP packet.
1381
*/
1382
++ia->ia_garp_count;
1383
if (ia->ia_garp_count >= V_garp_rexmit_count) {
1384
ifa_free(&ia->ia_ifa);
1385
} else {
1386
int rescheduled;
1387
IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
1388
rescheduled = callout_reset(&ia->ia_garp_timer,
1389
(1 << ia->ia_garp_count) * hz,
1390
garp_rexmit, ia);
1391
IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
1392
if (rescheduled) {
1393
ifa_free(&ia->ia_ifa);
1394
}
1395
}
1396
1397
CURVNET_RESTORE();
1398
NET_EPOCH_EXIT(et);
1399
}
1400
1401
/*
1402
* Start the GARP retransmit timer.
1403
*
1404
* A single GARP is always transmitted when an IPv4 address is added
1405
* to an interface and that is usually sufficient. However, in some
1406
* circumstances, such as when a shared address is passed between
1407
* cluster nodes, this single GARP may occasionally be dropped or
1408
* lost. This can lead to neighbors on the network link working with a
1409
* stale ARP cache and sending packets destined for that address to
1410
* the node that previously owned the address, which may not respond.
1411
*
1412
* To avoid this situation, GARP retransmits can be enabled by setting
1413
* the net.link.ether.inet.garp_rexmit_count sysctl to a value greater
1414
* than zero. The setting represents the maximum number of
1415
* retransmissions. The interval between retransmissions is calculated
1416
* using an exponential backoff algorithm, doubling each time, so the
1417
* retransmission intervals are: {1, 2, 4, 8, 16, ...} (seconds).
1418
*/
1419
static void
1420
garp_timer_start(struct ifaddr *ifa)
1421
{
1422
struct in_ifaddr *ia = (struct in_ifaddr *) ifa;
1423
1424
IF_ADDR_WLOCK(ia->ia_ifa.ifa_ifp);
1425
ia->ia_garp_count = 0;
1426
if (callout_reset(&ia->ia_garp_timer, (1 << ia->ia_garp_count) * hz,
1427
garp_rexmit, ia) == 0) {
1428
ifa_ref(ifa);
1429
}
1430
IF_ADDR_WUNLOCK(ia->ia_ifa.ifa_ifp);
1431
}
1432
1433
void
1434
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
1435
{
1436
struct epoch_tracker et;
1437
const struct sockaddr_in *dst_in;
1438
const struct sockaddr *dst;
1439
1440
if (ifa->ifa_carp != NULL)
1441
return;
1442
1443
dst = ifa->ifa_addr;
1444
dst_in = (const struct sockaddr_in *)dst;
1445
1446
if (ntohl(dst_in->sin_addr.s_addr) == INADDR_ANY)
1447
return;
1448
NET_EPOCH_ENTER(et);
1449
arp_announce_ifaddr(ifp, dst_in->sin_addr, IF_LLADDR(ifp));
1450
NET_EPOCH_EXIT(et);
1451
if (V_garp_rexmit_count > 0) {
1452
garp_timer_start(ifa);
1453
}
1454
1455
arp_add_ifa_lle(ifp, dst);
1456
}
1457
1458
void
1459
arp_announce_ifaddr(struct ifnet *ifp, struct in_addr addr, u_char *enaddr)
1460
{
1461
1462
if (ntohl(addr.s_addr) != INADDR_ANY)
1463
arprequest(ifp, &addr, &addr, enaddr);
1464
}
1465
1466
/*
1467
* Sends gratuitous ARPs for each ifaddr to notify other
1468
* nodes about the address change.
1469
*/
1470
static __noinline void
1471
arp_handle_ifllchange(struct ifnet *ifp)
1472
{
1473
struct ifaddr *ifa;
1474
1475
CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1476
if (ifa->ifa_addr->sa_family == AF_INET)
1477
arp_ifinit(ifp, ifa);
1478
}
1479
}
1480
1481
/*
1482
* A handler for interface link layer address change event.
1483
*/
1484
static void
1485
arp_iflladdr(void *arg __unused, struct ifnet *ifp)
1486
{
1487
/* if_bridge can update its lladdr during if_vmove(), after we've done
1488
* with in_ifdetach(). XXXGL: needs to be fixed. */
1489
if (ifp->if_inet == NULL)
1490
return;
1491
1492
lltable_update_ifaddr(LLTABLE(ifp));
1493
1494
if ((ifp->if_flags & IFF_UP) != 0)
1495
arp_handle_ifllchange(ifp);
1496
}
1497
1498
static void
1499
vnet_arp_init(void)
1500
{
1501
1502
if (IS_DEFAULT_VNET(curvnet)) {
1503
netisr_register(&arp_nh);
1504
iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event,
1505
arp_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
1506
}
1507
#ifdef VIMAGE
1508
else
1509
netisr_register_vnet(&arp_nh);
1510
#endif
1511
}
1512
VNET_SYSINIT(vnet_arp_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_SECOND,
1513
vnet_arp_init, NULL);
1514
1515
#ifdef VIMAGE
1516
/*
1517
* We have to unregister ARP along with IP otherwise we risk doing INADDR_HASH
1518
* lookups after destroying the hash. Ideally this would go on SI_ORDER_3.5.
1519
*/
1520
static void
1521
vnet_arp_destroy(__unused void *arg)
1522
{
1523
1524
netisr_unregister_vnet(&arp_nh);
1525
}
1526
VNET_SYSUNINIT(vnet_arp_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD,
1527
vnet_arp_destroy, NULL);
1528
#endif
1529
1530