skb->sk = sk;
skb->destructor = sctp_sock_rfree;
atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
+ /*
+ * This mimics the behavior of
+ * sk_stream_set_owner_r
+ */
+ sk->sk_forward_alloc -= event->rmem_len;
}
/* Tests if the list has one and only one entry. */
/* Use SCTP specific send buffer space queues. */
ep->sndbuf_policy = sctp_sndbuf_policy;
+
sk->sk_write_space = sctp_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
#include <linux/netdevice.h>
#include <linux/inetdevice.h>
#include <linux/seq_file.h>
+#include <linux/bootmem.h>
#include <net/protocol.h>
#include <net/ip.h>
#include <net/ipv6.h>
struct kmem_cache *sctp_chunk_cachep __read_mostly;
struct kmem_cache *sctp_bucket_cachep __read_mostly;
+extern int sysctl_sctp_mem[3];
+extern int sysctl_sctp_rmem[3];
+extern int sysctl_sctp_wmem[3];
+
/* Return the address of the control sock. */
struct sock *sctp_get_ctl_sock(void)
{
int i;
int status = -EINVAL;
unsigned long goal;
+ unsigned long limit;
+ int max_share;
int order;
/* SCTP_DEBUG sanity check. */
/* Initialize handle used for association ids. */
idr_init(&sctp_assocs_id);
+ /* Set the pressure threshold to be a fraction of global memory that
+ * is up to 1/2 at 256 MB, decreasing toward zero with the amount of
+ * memory, with a floor of 128 pages.
+ * Note this initalizes the data in sctpv6_prot too
+ * Unabashedly stolen from tcp_init
+ */
+ limit = min(num_physpages, 1UL<<(28-PAGE_SHIFT)) >> (20-PAGE_SHIFT);
+ limit = (limit * (num_physpages >> (20-PAGE_SHIFT))) >> (PAGE_SHIFT-11);
+ limit = max(limit, 128UL);
+ sysctl_sctp_mem[0] = limit / 4 * 3;
+ sysctl_sctp_mem[1] = limit;
+ sysctl_sctp_mem[2] = sysctl_sctp_mem[0] * 2;
+
+ /* Set per-socket limits to no more than 1/128 the pressure threshold*/
+ limit = (sysctl_sctp_mem[1]) << (PAGE_SHIFT - 7);
+ max_share = min(4UL*1024*1024, limit);
+
+ sysctl_sctp_rmem[0] = PAGE_SIZE; /* give each asoc 1 page min */
+ sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1));
+ sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share);
+
+ sysctl_sctp_wmem[0] = SK_STREAM_MEM_QUANTUM;
+ sysctl_sctp_wmem[1] = 16*1024;
+ sysctl_sctp_wmem[2] = max(64*1024, max_share);
+
/* Size and allocate the association hash table.
* The methodology is similar to that of the tcp hash tables.
*/
sctp_verb_t deliver;
int tmp;
__u32 tsn;
- int account_value;
struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map;
struct sock *sk = asoc->base.sk;
- int rcvbuf_over = 0;
data_hdr = chunk->subh.data_hdr = (sctp_datahdr_t *)chunk->skb->data;
skb_pull(chunk->skb, sizeof(sctp_datahdr_t));
/* ASSERT: Now skb->data is really the user data. */
- /*
- * If we are established, and we have used up our receive buffer
- * memory, think about droping the frame.
- * Note that we have an opportunity to improve performance here.
- * If we accept one chunk from an skbuff, we have to keep all the
- * memory of that skbuff around until the chunk is read into user
- * space. Therefore, once we accept 1 chunk we may as well accept all
- * remaining chunks in the skbuff. The data_accepted flag helps us do
- * that.
- */
- if ((asoc->state == SCTP_STATE_ESTABLISHED) && (!chunk->data_accepted)) {
- /*
- * If the receive buffer policy is 1, then each
- * association can allocate up to sk_rcvbuf bytes
- * otherwise, all the associations in aggregate
- * may allocate up to sk_rcvbuf bytes
- */
- if (asoc->ep->rcvbuf_policy)
- account_value = atomic_read(&asoc->rmem_alloc);
- else
- account_value = atomic_read(&sk->sk_rmem_alloc);
- if (account_value > sk->sk_rcvbuf) {
- /*
- * We need to make forward progress, even when we are
- * under memory pressure, so we always allow the
- * next tsn after the ctsn ack point to be accepted.
- * This lets us avoid deadlocks in which we have to
- * drop frames that would otherwise let us drain the
- * receive queue.
- */
- if ((sctp_tsnmap_get_ctsn(map) + 1) != tsn)
- return SCTP_IERROR_IGNORE_TSN;
-
- /*
- * We're going to accept the frame but we should renege
- * to make space for it. This will send us down that
- * path later in this function.
- */
- rcvbuf_over = 1;
- }
- }
-
/* Process ECN based congestion.
*
* Since the chunk structure is reused for all chunks within
* seems a bit troublesome in that frag_point varies based on
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
- * NOTE: If we have a full receive buffer here, we only renege if
- * our receiver can still make progress without the tsn being
- * received. We do this because in the event that the associations
- * receive queue is empty we are filling a leading gap, and since
- * reneging moves the gap to the end of the tsn stream, we are likely
- * to stall again very shortly. Avoiding the renege when we fill a
- * leading gap is a good heuristic for avoiding such steady state
- * stalls.
- */
- if (!asoc->rwnd || asoc->rwnd_over ||
- (datalen > asoc->rwnd + asoc->frag_point) ||
- (rcvbuf_over && (!skb_queue_len(&sk->sk_receive_queue)))) {
+ */
+ if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
+ (datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
* room. Note: Playing nice with a confused sender. A
}
}
+ /*
+ * Also try to renege to limit our memory usage in the event that
+ * we are under memory pressure
+ * If we can't renege, don't worry about it, the sk_stream_rmem_schedule
+ * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
+ * memory usage too much
+ */
+ if (*sk->sk_prot_creator->memory_pressure) {
+ if (sctp_tsnmap_has_gap(map) &&
+ (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
+ SCTP_DEBUG_PRINTK("Under Pressure! Reneging for tsn:%u\n", tsn);
+ deliver = SCTP_CMD_RENEGE;
+ }
+ }
+
/*
* Section 3.3.10.9 No User Data (9)
*
struct sctp_association *, sctp_socket_type_t);
static char *sctp_hmac_alg = SCTP_COOKIE_HMAC_ALG;
+extern struct kmem_cache *sctp_bucket_cachep;
+extern int sysctl_sctp_mem[3];
+extern int sysctl_sctp_rmem[3];
+extern int sysctl_sctp_wmem[3];
+
+int sctp_memory_pressure;
+atomic_t sctp_memory_allocated;
+atomic_t sctp_sockets_allocated;
+
+static void sctp_enter_memory_pressure(void)
+{
+ sctp_memory_pressure = 1;
+}
+
+
/* Get the sndbuf space available at the time on the association. */
static inline int sctp_wspace(struct sctp_association *asoc)
{
- struct sock *sk = asoc->base.sk;
- int amt = 0;
+ int amt;
- if (asoc->ep->sndbuf_policy) {
- /* make sure that no association uses more than sk_sndbuf */
- amt = sk->sk_sndbuf - asoc->sndbuf_used;
+ if (asoc->ep->sndbuf_policy)
+ amt = asoc->sndbuf_used;
+ else
+ amt = atomic_read(&asoc->base.sk->sk_wmem_alloc);
+
+ if (amt >= asoc->base.sk->sk_sndbuf) {
+ if (asoc->base.sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ amt = 0;
+ else {
+ amt = sk_stream_wspace(asoc->base.sk);
+ if (amt < 0)
+ amt = 0;
+ }
} else {
- /* do socket level accounting */
- amt = sk->sk_sndbuf - atomic_read(&sk->sk_wmem_alloc);
+ amt = asoc->base.sk->sk_sndbuf - amt;
}
-
- if (amt < 0)
- amt = 0;
-
return amt;
}
sizeof(struct sctp_chunk);
atomic_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+ sk_charge_skb(sk, chunk->skb);
}
/* Verify that this is a valid address. */
sp->hmac = NULL;
SCTP_DBG_OBJCNT_INC(sock);
+ atomic_inc(&sctp_sockets_allocated);
return 0;
}
/* Release our hold on the endpoint. */
ep = sctp_sk(sk)->ep;
sctp_endpoint_free(ep);
-
+ atomic_dec(&sctp_sockets_allocated);
return 0;
}
atomic_sub(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
+ /*
+ * This undoes what is done via sk_charge_skb
+ */
+ sk->sk_wmem_queued -= skb->truesize;
+ sk->sk_forward_alloc += skb->truesize;
+
sock_wfree(skb);
__sctp_write_space(asoc);
struct sctp_ulpevent *event = sctp_skb2event(skb);
atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
+
+ /*
+ * Mimic the behavior of sk_stream_rfree
+ */
+ sk->sk_forward_alloc += event->rmem_len;
}
sctp_release_sock(newsk);
}
+
/* This proto struct describes the ULP interface for SCTP. */
struct proto sctp_prot = {
.name = "SCTP",
.unhash = sctp_unhash,
.get_port = sctp_get_port,
.obj_size = sizeof(struct sctp_sock),
+ .sysctl_mem = sysctl_sctp_mem,
+ .sysctl_rmem = sysctl_sctp_rmem,
+ .sysctl_wmem = sysctl_sctp_wmem,
+ .memory_pressure = &sctp_memory_pressure,
+ .enter_memory_pressure = sctp_enter_memory_pressure,
+ .memory_allocated = &sctp_memory_allocated,
};
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
.unhash = sctp_unhash,
.get_port = sctp_get_port,
.obj_size = sizeof(struct sctp6_sock),
+ .sysctl_mem = sysctl_sctp_mem,
+ .sysctl_rmem = sysctl_sctp_rmem,
+ .sysctl_wmem = sysctl_sctp_wmem,
+ .memory_pressure = &sctp_memory_pressure,
+ .enter_memory_pressure = sctp_enter_memory_pressure,
+ .memory_allocated = &sctp_memory_allocated,
};
#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
static long sack_timer_min = 1;
static long sack_timer_max = 500;
+int sysctl_sctp_mem[3];
+int sysctl_sctp_rmem[3];
+int sysctl_sctp_wmem[3];
+
+/*
+ * per assoc memory limitationf for sends
+ */
+int sysctl_sctp_wmem[3];
+
static ctl_table sctp_table[] = {
{
.ctl_name = NET_SCTP_RTO_INITIAL,
.extra1 = &sack_timer_min,
.extra2 = &sack_timer_max,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sctp_mem",
+ .data = &sysctl_sctp_mem,
+ .maxlen = sizeof(sysctl_sctp_mem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sctp_rmem",
+ .data = &sysctl_sctp_rmem,
+ .maxlen = sizeof(sysctl_sctp_rmem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "sctp_wmem",
+ .data = &sysctl_sctp_wmem,
+ .maxlen = sizeof(sysctl_sctp_wmem),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
{ .ctl_name = 0 }
};
struct sctp_ulpevent *event = NULL;
struct sk_buff *skb;
size_t padding, len;
+ int rx_count;
+
+ /*
+ * check to see if we need to make space for this
+ * new skb, expand the rcvbuffer if needed, or drop
+ * the frame
+ */
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+ if (rx_count >= asoc->base.sk->sk_rcvbuf) {
+
+ if ((asoc->base.sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
+ (!sk_stream_rmem_schedule(asoc->base.sk, chunk->skb)))
+ goto fail;
+ }
/* Clone the original skb, sharing the data. */
skb = skb_clone(chunk->skb, gfp);
sctp_ulpq_partial_delivery(ulpq, chunk, gfp);
}
+ sk_stream_mem_reclaim(asoc->base.sk);
return;
}