Skip to content

Commit e08d0b3

Browse files
edumazetdavem330
authored andcommitted
inet: implement lockless IP_TOS
Some reads of inet->tos are racy. Add needed READ_ONCE() annotations and convert IP_TOS option lockless. v2: missing changes in include/net/route.h (David Ahern) Signed-off-by: Eric Dumazet <edumazet@google.com> Reviewed-by: David Ahern <dsahern@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
1 parent ceaa714 commit e08d0b3

File tree

10 files changed

+31
-36
lines changed

10 files changed

+31
-36
lines changed

include/net/ip.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ static inline u8 ip_sendmsg_scope(const struct inet_sock *inet,
258258

259259
static inline __u8 get_rttos(struct ipcm_cookie* ipc, struct inet_sock *inet)
260260
{
261-
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(inet->tos);
261+
return (ipc->tos != -1) ? RT_TOS(ipc->tos) : RT_TOS(READ_ONCE(inet->tos));
262262
}
263263

264264
/* datagram.c */
@@ -810,6 +810,5 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val);
810810
void ip_sock_set_pktinfo(struct sock *sk);
811811
void ip_sock_set_recverr(struct sock *sk);
812812
void ip_sock_set_tos(struct sock *sk, int val);
813-
void __ip_sock_set_tos(struct sock *sk, int val);
814813

815814
#endif/* _IP_H */

include/net/route.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737

3838
#define RTO_ONLINK0x01
3939

40-
#define RT_CONN_FLAGS(sk) (RT_TOS(inet_sk(sk)->tos) | sock_flag(sk, SOCK_LOCALROUTE))
40+
#define RT_CONN_FLAGS(sk) (RT_TOS(READ_ONCE(inet_sk(sk)->tos)) | sock_flag(sk, SOCK_LOCALROUTE))
4141
#define RT_CONN_FLAGS_TOS(sk,tos) (RT_TOS(tos) | sock_flag(sk, SOCK_LOCALROUTE))
4242

4343
static inline __u8 ip_sock_rt_scope(const struct sock *sk)
@@ -50,7 +50,7 @@ static inline __u8 ip_sock_rt_scope(const struct sock *sk)
5050

5151
static inline __u8 ip_sock_rt_tos(const struct sock *sk)
5252
{
53-
return RT_TOS(inet_sk(sk)->tos);
53+
return RT_TOS(READ_ONCE(inet_sk(sk)->tos));
5454
}
5555

5656
struct ip_tunnel_info;

net/dccp/ipv4.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -511,7 +511,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
511511
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
512512
ireq->ir_rmt_addr,
513513
rcu_dereference(ireq->ireq_opt),
514-
inet_sk(sk)->tos);
514+
READ_ONCE(inet_sk(sk)->tos));
515515
rcu_read_unlock();
516516
err = net_xmit_eval(err);
517517
}

net/ipv4/inet_diag.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
134134
* hence this needs to be included regardless of socket family.
135135
*/
136136
if (ext & (1 << (INET_DIAG_TOS - 1)))
137-
if (nla_put_u8(skb, INET_DIAG_TOS, inet->tos) < 0)
137+
if (nla_put_u8(skb, INET_DIAG_TOS, READ_ONCE(inet->tos)) < 0)
138138
goto errout;
139139

140140
#if IS_ENABLED(CONFIG_IPV6)

net/ipv4/ip_output.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ EXPORT_SYMBOL(__ip_queue_xmit);
544544

545545
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
546546
{
547-
return __ip_queue_xmit(sk, skb, fl, inet_sk(sk)->tos);
547+
return __ip_queue_xmit(sk, skb, fl, READ_ONCE(inet_sk(sk)->tos));
548548
}
549549
EXPORT_SYMBOL(ip_queue_xmit);
550550

@@ -1438,7 +1438,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
14381438
iph = ip_hdr(skb);
14391439
iph->version = 4;
14401440
iph->ihl = 5;
1441-
iph->tos = (cork->tos != -1) ? cork->tos : inet->tos;
1441+
iph->tos = (cork->tos != -1) ? cork->tos : READ_ONCE(inet->tos);
14421442
iph->frag_off = df;
14431443
iph->ttl = ttl;
14441444
iph->protocol = sk->sk_protocol;

net/ipv4/ip_sockglue.c

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -585,25 +585,20 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
585585
return err;
586586
}
587587

588-
void __ip_sock_set_tos(struct sock *sk, int val)
588+
void ip_sock_set_tos(struct sock *sk, int val)
589589
{
590+
u8 old_tos = READ_ONCE(inet_sk(sk)->tos);
591+
590592
if (sk->sk_type == SOCK_STREAM) {
591593
val &= ~INET_ECN_MASK;
592-
val |= inet_sk(sk)->tos & INET_ECN_MASK;
594+
val |= old_tos & INET_ECN_MASK;
593595
}
594-
if (inet_sk(sk)->tos != val) {
595-
inet_sk(sk)->tos = val;
596+
if (old_tos != val) {
597+
WRITE_ONCE(inet_sk(sk)->tos, val);
596598
WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
597599
sk_dst_reset(sk);
598600
}
599601
}
600-
601-
void ip_sock_set_tos(struct sock *sk, int val)
602-
{
603-
lock_sock(sk);
604-
__ip_sock_set_tos(sk, val);
605-
release_sock(sk);
606-
}
607602
EXPORT_SYMBOL(ip_sock_set_tos);
608603

609604
void ip_sock_set_freebind(struct sock *sk)
@@ -1050,6 +1045,9 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
10501045
return 0;
10511046
case IP_MTU_DISCOVER:
10521047
return ip_sock_set_mtu_discover(sk, val);
1048+
case IP_TOS:/* This sets both TOS and Precedence */
1049+
ip_sock_set_tos(sk, val);
1050+
return 0;
10531051
}
10541052

10551053
err = 0;
@@ -1104,9 +1102,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
11041102
}
11051103
}
11061104
break;
1107-
case IP_TOS:/* This sets both TOS and Precedence */
1108-
__ip_sock_set_tos(sk, val);
1109-
break;
11101105
case IP_UNICAST_IF:
11111106
{
11121107
struct net_device *dev = NULL;
@@ -1593,6 +1588,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
15931588
case IP_MTU_DISCOVER:
15941589
val = READ_ONCE(inet->pmtudisc);
15951590
goto copyval;
1591+
case IP_TOS:
1592+
val = READ_ONCE(inet->tos);
1593+
goto copyval;
15961594
}
15971595

15981596
if (needs_rtnl)
@@ -1629,9 +1627,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
16291627
return -EFAULT;
16301628
return 0;
16311629
}
1632-
case IP_TOS:
1633-
val = inet->tos;
1634-
break;
16351630
case IP_MTU:
16361631
{
16371632
struct dst_entry *dst;

net/ipv4/tcp_ipv4.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,10 +1024,11 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
10241024
if (skb) {
10251025
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
10261026

1027-
tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
1028-
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
1029-
(inet_sk(sk)->tos & INET_ECN_MASK) :
1030-
inet_sk(sk)->tos;
1027+
tos = READ_ONCE(inet_sk(sk)->tos);
1028+
1029+
if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
1030+
tos = (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
1031+
(tos & INET_ECN_MASK);
10311032

10321033
if (!INET_ECN_is_capable(tos) &&
10331034
tcp_bpf_ca_needs_ecn((struct sock *)req))

net/mptcp/sockopt.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -734,11 +734,11 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname,
734734

735735
lock_sock(sk);
736736
sockopt_seq_inc(msk);
737-
val = inet_sk(sk)->tos;
737+
val = READ_ONCE(inet_sk(sk)->tos);
738738
mptcp_for_each_subflow(msk, subflow) {
739739
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
740740

741-
__ip_sock_set_tos(ssk, val);
741+
ip_sock_set_tos(ssk, val);
742742
}
743743
release_sock(sk);
744744

@@ -1343,7 +1343,7 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
13431343

13441344
switch (optname) {
13451345
case IP_TOS:
1346-
return mptcp_put_int_option(msk, optval, optlen, inet_sk(sk)->tos);
1346+
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
13471347
}
13481348

13491349
return -EOPNOTSUPP;
@@ -1411,7 +1411,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
14111411
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
14121412
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
14131413
ssk->sk_ipv6only = sk->sk_ipv6only;
1414-
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
1414+
ip_sock_set_tos(ssk, inet_sk(sk)->tos);
14151415

14161416
if (sk->sk_userlocks & tx_rx_locks) {
14171417
ssk->sk_userlocks |= sk->sk_userlocks & tx_rx_locks;

net/sctp/protocol.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
426426
struct dst_entry *dst = NULL;
427427
union sctp_addr *daddr = &t->ipaddr;
428428
union sctp_addr dst_saddr;
429-
__u8 tos = inet_sk(sk)->tos;
429+
u8 tos = READ_ONCE(inet_sk(sk)->tos);
430430

431431
if (t->dscp & SCTP_DSCP_SET_MASK)
432432
tos = t->dscp & SCTP_DSCP_VAL_MASK;
@@ -1057,7 +1057,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t)
10571057
struct flowi4 *fl4 = &t->fl.u.ip4;
10581058
struct sock *sk = skb->sk;
10591059
struct inet_sock *inet = inet_sk(sk);
1060-
__u8 dscp = inet->tos;
1060+
__u8 dscp = READ_ONCE(inet->tos);
10611061
__be16 df = 0;
10621062

10631063
pr_debug("%s: skb:%p, len:%d, src:%pI4, dst:%pI4\n", __func__, skb,

tools/testing/selftests/net/mptcp/mptcp_connect.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ run_test_transparent()
716716
# the required infrastructure in MPTCP sockopt code. To support TOS, the
717717
# following function has been exported (T). Not great but better than
718718
# checking for a specific kernel version.
719-
if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
719+
if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then
720720
echo "INFO: ${msg} not supported by the kernel: SKIP"
721721
mptcp_lib_result_skip "${TEST_GROUP}"
722722
return

0 commit comments

Comments
 (0)