patch-2.1.99 linux/net/ipv4/ip_output.c
Next file: linux/net/ipv4/ipmr.c
Previous file: linux/net/ipv4/icmp.c
Back to the patch index
Back to the overall index
- Lines: 558
- Date:
Tue Apr 28 11:10:11 1998
- Orig file:
v2.1.98/linux/net/ipv4/ip_output.c
- Orig date:
Wed Apr 1 20:11:55 1998
diff -u --recursive --new-file v2.1.98/linux/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c
@@ -5,7 +5,7 @@
*
* The Internet Protocol (IP) output module.
*
- * Version: $Id: ip_output.c,v 1.51 1998/03/28 00:55:34 davem Exp $
+ * Version: $Id: ip_output.c,v 1.56 1998/04/17 02:36:46 davem Exp $
*
* Authors: Ross Biro, <bir7@leland.Stanford.Edu>
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
@@ -31,6 +31,10 @@
* Andi Kleen: Fix broken PMTU recovery and remove
* some redundant tests.
* Vitaly E. Lavrov : Transparent proxy revived after year coma.
+ * Andi Kleen : Replace ip_reply with ip_send_reply.
+ * Andi Kleen : Split fast and slow ip_build_xmit path
+ * for decreased register pressure on x86
+ * and more readibility.
*/
#include <asm/uaccess.h>
@@ -70,7 +74,6 @@
#include <linux/firewall.h>
#include <linux/mroute.h>
#include <linux/netlink.h>
-#include <linux/ipsec.h>
/*
* Shall we try to damage output packets if routing dev changes?
@@ -88,6 +91,9 @@
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
+/*
+ * Add an ip header to a skbuff and send it out.
+ */
void ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
u32 saddr, u32 daddr, struct ip_options *opt)
{
@@ -303,16 +309,6 @@
if (call_out_firewall(PF_INET, dev, iph, NULL, &skb) < FW_ACCEPT)
goto drop;
-#ifdef CONFIG_NET_SECURITY
- /* Add an IP checksum (must do this before SECurity because
- * of possible tunneling).
- */
- ip_send_check(iph);
- if (call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 4, &skb) < FW_ACCEPT)
- goto drop;
- iph = skb->nh.iph;
- /* Don't update tot_len, as the dev->mtu is already decreased. */
-#endif
/* This can happen when the transport layer has segments queued
* with a cached route, and by the time we get here things are
* re-routed to a device with a different MTU than the original
@@ -335,10 +331,9 @@
if (tot_len > rt->u.dst.pmtu)
goto fragment;
-#ifndef CONFIG_NET_SECURITY
/* Add an IP checksum. */
ip_send_check(iph);
-#endif
+
skb->priority = sk->priority;
skb->dst->output(skb);
return;
@@ -382,7 +377,7 @@
* length to be copied.
*/
-int ip_build_xmit(struct sock *sk,
+int ip_build_xmit_slow(struct sock *sk,
int getfrag (const void *,
char *,
unsigned int,
@@ -397,91 +392,16 @@
int err;
int offset, mf;
unsigned short id;
- struct iphdr *iph;
+
int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
int nfrags=0;
struct ip_options *opt = ipc->opt;
int df = htons(IP_DF);
-#ifdef CONFIG_NET_SECURITY
- int fw_res;
-#endif
if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
- (rt->u.dst.mxlock&(1<<RTAX_MTU)))
+ (rt->u.dst.mxlock&(1<<RTAX_MTU)))
df = 0;
-
-
- /*
- * Try the simple case first. This leaves fragmented frames, and by
- * choice RAW frames within 20 bytes of maximum size(rare) to the long path
- */
-
- if (!sk->ip_hdrincl)
- length += sizeof(struct iphdr);
-
- if (length <= rt->u.dst.pmtu && opt == NULL) {
- int error;
- struct sk_buff *skb=sock_alloc_send_skb(sk, length+hh_len+15,
- 0, flags&MSG_DONTWAIT, &error);
- if(skb==NULL) {
- ip_statistics.IpOutDiscards++;
- return error;
- }
-
- skb->priority = sk->priority;
- skb->dst = dst_clone(&rt->u.dst);
- skb_reserve(skb, hh_len);
-
- skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
-
- dev_lock_list();
-
- if(!sk->ip_hdrincl) {
- iph->version=4;
- iph->ihl=5;
- iph->tos=sk->ip_tos;
- iph->tot_len = htons(length);
- iph->id=htons(ip_id_count++);
- iph->frag_off = df;
- iph->ttl=sk->ip_mc_ttl;
- if (rt->rt_type != RTN_MULTICAST)
- iph->ttl=sk->ip_ttl;
- iph->protocol=sk->protocol;
- iph->saddr=rt->rt_src;
- iph->daddr=rt->rt_dst;
- iph->check=0;
- iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
- err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
- }
- else
- err = getfrag(frag, (void *)iph, 0, length);
- dev_unlock_list();
-
- if (err)
- err = -EFAULT;
-
- if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
- err = -EPERM;
-#ifdef CONFIG_NET_SECURITY
- if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 5, &skb))<FW_ACCEPT)
- {
- kfree_skb(skb);
- if (fw_res != FW_QUEUE)
- return -EPERM;
- else
- return 0;
- }
-#endif
-
- if (err)
- {
- kfree_skb(skb);
- return err;
- }
-
- return rt->u.dst.output(skb);
- }
-
+
if (!sk->ip_hdrincl)
length -= sizeof(struct iphdr);
@@ -497,7 +417,7 @@
*/
maxfraglen = ((rt->u.dst.pmtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
- }
+ }
if (length + fragheaderlen > 0xFFFF)
return -EMSGSIZE;
@@ -551,9 +471,9 @@
*/
do {
- struct sk_buff * skb;
int error;
char *data;
+ struct sk_buff * skb;
/*
* Get the memory we require with some space left for alignment.
@@ -581,13 +501,15 @@
*/
data = skb_put(skb, fraglen);
- skb->nh.iph = iph = (struct iphdr *)data;
+ skb->nh.iph = (struct iphdr *)data;
/*
* Only write IP header onto non-raw packets
*/
if(!sk->ip_hdrincl) {
+ struct iphdr *iph = (struct iphdr *)data;
+
iph->version = 4;
iph->ihl = 5;
if (opt) {
@@ -622,49 +544,148 @@
* User data callback
*/
- err = getfrag(frag, data, offset, fraglen-fragheaderlen);
- if (err)
+ err = 0;
+ if (getfrag(frag, data, offset, fraglen-fragheaderlen))
err = -EFAULT;
/*
* Account for the fragment.
*/
-
- if(!err && !offset && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
+
+ if(!err && offset == 0 &&
+ call_out_firewall(PF_INET, rt->u.dst.dev, skb->nh.iph, NULL, &skb) < FW_ACCEPT)
err = -EPERM;
-#ifdef CONFIG_NET_SECURITY
- if ((fw_res=call_out_firewall(PF_SECURITY, NULL, NULL, (void *) 6, &skb))<FW_ACCEPT)
- {
- if (fw_res != FW_QUEUE)
- err= -EPERM;
- }
-#endif
- if (err)
- {
+
+ if (err) {
+ ip_statistics.IpOutDiscards++;
kfree_skb(skb);
dev_unlock_list();
- return err;
- }
+ return err;
+ }
+
+
offset -= (maxfraglen-fragheaderlen);
fraglen = maxfraglen;
-
nfrags++;
-
+
+ err = 0;
if (rt->u.dst.output(skb)) {
- if (nfrags>1)
- ip_statistics.IpFragCreates += nfrags;
- dev_unlock_list();
- return -ENETDOWN;
+ err = -ENETDOWN;
+ ip_statistics.IpOutDiscards++;
+ break;
}
} while (offset >= 0);
if (nfrags>1)
ip_statistics.IpFragCreates += nfrags;
+ dev_unlock_list();
+ return err;
+}
+
+
+/*
+ * Fast path for unfragmented packets.
+ */
+int ip_build_xmit(struct sock *sk,
+ int getfrag (const void *,
+ char *,
+ unsigned int,
+ unsigned int),
+ const void *frag,
+ unsigned length,
+ struct ipcm_cookie *ipc,
+ struct rtable *rt,
+ int flags)
+{
+ int err;
+ struct sk_buff *skb;
+ int df;
+ struct iphdr *iph;
+
+ /*
+ * Try the simple case first. This leaves fragmented frames, and by
+ * choice RAW frames within 20 bytes of maximum size(rare) to the long path
+ */
+
+ if (!sk->ip_hdrincl)
+ length += sizeof(struct iphdr);
+
+ /*
+ * Check for slow path.
+ */
+ if (length > rt->u.dst.pmtu || ipc->opt != NULL)
+ return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags);
+
+ /*
+ * Do path mtu discovery if needed.
+ */
+ df = htons(IP_DF);
+ if (sk->ip_pmtudisc == IP_PMTUDISC_DONT ||
+ (rt->u.dst.mxlock&(1<<RTAX_MTU)))
+ df = 0;
+
+ /*
+ * Fast path for unfragmented frames without options.
+ */
+ {
+ int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+
+ skb = sock_alloc_send_skb(sk, length+hh_len+15,
+ 0, flags&MSG_DONTWAIT, &err);
+ if(skb==NULL)
+ goto error;
+ skb_reserve(skb, hh_len);
+ }
+
+ skb->priority = sk->priority;
+ skb->dst = dst_clone(&rt->u.dst);
+
+ skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+
+ dev_lock_list();
+
+ if(!sk->ip_hdrincl) {
+ iph->version=4;
+ iph->ihl=5;
+ iph->tos=sk->ip_tos;
+ iph->tot_len = htons(length);
+ iph->id=htons(ip_id_count++);
+ iph->frag_off = df;
+ iph->ttl=sk->ip_mc_ttl;
+ if (rt->rt_type != RTN_MULTICAST)
+ iph->ttl=sk->ip_ttl;
+ iph->protocol=sk->protocol;
+ iph->saddr=rt->rt_src;
+ iph->daddr=rt->rt_dst;
+ iph->check=0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4);
+ }
+ else
+ err = getfrag(frag, (void *)iph, 0, length);
dev_unlock_list();
- return 0;
+
+ if (err)
+ err = -EFAULT;
+
+ if(!err && call_out_firewall(PF_INET, rt->u.dst.dev, iph, NULL, &skb) < FW_ACCEPT)
+ err = -EPERM;
+
+ if (err) {
+ kfree_skb(skb);
+ goto error;
+ }
+
+ return rt->u.dst.output(skb);
+
+error:
+ ip_statistics.IpOutDiscards++;
+ return err;
}
+
+
/*
* This IP datagram is too large to be sent in one piece. Break it up into
@@ -682,7 +703,7 @@
unsigned char *ptr;
struct device *dev;
struct sk_buff *skb2;
- int left, mtu, hlen, len;
+ unsigned int mtu, hlen, left, len;
int offset;
int not_last_frag;
u16 dont_fragment;
@@ -712,11 +733,8 @@
* in this case we were fortunate it didn't happen
*/
- if (mtu<8) {
- ip_statistics.IpFragFails++;
- kfree_skb(skb);
- return;
- }
+ if (mtu<8)
+ goto fail;
/*
* Fragment the datagram.
@@ -745,8 +763,7 @@
/* IF: we are not sending upto and including the packet end
then align the next start on an eight byte boundary */
if (len < left) {
- len/=8;
- len*=8;
+ len &= ~7;
}
/*
* Allocate buffer.
@@ -754,9 +771,7 @@
if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
- ip_statistics.IpFragFails++;
- kfree_skb(skb);
- return;
+ goto fail;
}
/*
@@ -829,61 +844,96 @@
}
kfree_skb(skb);
ip_statistics.IpFragOKs++;
+ return;
+
+fail:
+ kfree_skb(skb);
+ ip_statistics.IpFragFails++;
}
-struct sk_buff * ip_reply(struct sk_buff *skb, int payload)
+/*
+ * Fetch data from kernel space and fill in checksum if needed.
+ */
+static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset,
+ unsigned int fraglen)
+{
+ struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
+ u16 *pktp = (u16 *)to;
+ struct iovec *iov;
+ int len;
+ int hdrflag = 1;
+
+#if 0
+ printk("ip_reply_glue_bits: offset=%u,flen=%u iov[0].l=%u,iov[1].len=%u\n",
+ offset,fraglen,dp->iov[0].iov_len,dp->iov[1].iov_len);
+#endif
+
+ iov = &dp->iov[0];
+ if (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ iov++;
+ hdrflag = 0;
+ }
+ len = iov->iov_len - offset;
+ if (fraglen > len) { /* overlapping. */
+#if 1
+ if (iov > &dp->iov[0]) {
+ printk("frag too long! (o=%u,fl=%u)\n",offset,fraglen);
+ return -1;
+ }
+#endif
+ dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
+ dp->csum);
+ offset = 0;
+ fraglen -= len;
+ to += len;
+ iov++;
+ }
+
+ dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen,
+ dp->csum);
+
+ if (hdrflag && dp->csumoffset)
+ *(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
+ return 0;
+}
+
+/*
+ * Generic function to send a packet as reply to another packet.
+ * Used to send TCP resets so far. ICMP should use this function too.
+ *
+ * Should run single threaded per socket because it uses the sock
+ * structure to pass arguments.
+ */
+void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ unsigned int len)
{
struct {
struct ip_options opt;
char data[40];
} replyopts;
-
- struct rtable *rt = (struct rtable*)skb->dst;
- struct sk_buff *reply;
- int iphlen;
- struct iphdr *iph;
-
struct ipcm_cookie ipc;
u32 daddr;
-
+ struct rtable *rt = (struct rtable*)skb->dst;
+
if (ip_options_echo(&replyopts.opt, skb))
- return NULL;
+ return;
+
+ sk->ip_tos = skb->nh.iph->tos;
+ sk->priority = skb->priority;
+ sk->protocol = skb->nh.iph->protocol;
daddr = ipc.addr = rt->rt_src;
ipc.opt = &replyopts.opt;
+
if (ipc.opt->srr)
daddr = replyopts.opt.faddr;
-
if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
- return NULL;
-
- iphlen = sizeof(struct iphdr) + replyopts.opt.optlen;
- reply = alloc_skb(rt->u.dst.dev->hard_header_len+15+iphlen+payload, GFP_ATOMIC);
- if (reply == NULL) {
- ip_rt_put(rt);
- return NULL;
- }
-
- reply->priority = skb->priority;
- reply->dst = &rt->u.dst;
- skb_reserve(reply, (rt->u.dst.dev->hard_header_len+15)&~15);
-
- /* Now build the IP header. */
- reply->nh.iph = iph = (struct iphdr *)skb_put(reply, iphlen);
-
- iph->version = 4;
- iph->ihl = iphlen>>2;
- iph->tos = skb->nh.iph->tos;
- iph->frag_off = 0;
- iph->ttl = MAXTTL;
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
- iph->protocol = skb->nh.iph->protocol;
- iph->id = htons(ip_id_count++);
-
- ip_options_build(reply, &replyopts.opt, daddr, rt, 0);
+ return;
- return reply;
+ /* And let IP do all the hard work. */
+ ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
+ ip_rt_put(rt);
}
/*
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov