--- linux-2.6.11-orig/include/linux/sysctl.h	2005-03-02 04:38:10.000000000 -0300
+++ linux-2.6.11/include/linux/sysctl.h	2005-04-12 22:01:39.000000000 -0300
@@ -352,6 +338,7 @@
 	NET_TCP_BIC_BETA=108,
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 	NET_TCP_CONG_CONTROL=110,
+	NET_TCP_CP_PRIV=111,
 };
 
 enum {
--- linux-2.6.11-orig/include/linux/tcp.h	2005-03-02 04:38:08.000000000 -0300
+++ linux-2.6.11/include/linux/tcp.h	2005-04-12 21:59:35.000000000 -0300
@@ -399,6 +479,10 @@
 	struct tcp_congestion_ops *ca_ops;
 	u32	ca_priv[16];
 #define TCP_CA_PRIV_SIZE	(16*sizeof(u32))
+
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+	__u32		ts_offset;	/* offset from tcp_time_stamp */
+#endif
 };
 
 static inline struct tcp_sock *tcp_sk(const struct sock *sk)
--- linux-2.6.11-orig/include/net/tcp.h	2005-03-02 04:37:48.000000000 -0300
+++ linux-2.6.11/include/net/tcp.h	2005-04-12 21:59:35.000000000 -0300
@@ -219,6 +217,9 @@
 	struct in6_addr		tw_v6_rcv_saddr;
 	int			tw_v6_ipv6only;
 #endif
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+	__u32			ts_offset; /* offset from tcp_time_stamp */
+#endif
 };
 
 static __inline__ void tw_add_node(struct tcp_tw_bucket *tw,
@@ -318,6 +319,8 @@
 extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw);
 
+#define tw_time_stamp(tw) tp_time_stamp(tw)
+
 
 /* Socket demux engine toys. */
 #ifdef __BIG_ENDIAN
@@ -846,6 +1033,10 @@
 extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, 
 				     __u16 *mss);
 
+/* tcp_ipv4.c */
+
+extern struct tcp_func ipv4_specific;
+
 /* tcp_output.c */
 
 extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
@@ -1026,6 +1203,12 @@
  */
 #define tcp_time_stamp		((__u32)(jiffies))
 
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+#define tp_time_stamp(tp)	(tcp_time_stamp+(tp)->ts_offset)
+#else
+#define tp_time_stamp(tp)	tcp_time_stamp
+#endif
+
 /* This is what the send packet queueing engine uses to pass
  * TCP per-packet control information to the transmission
  * code.  We also store the host-order sequence numbers in
--- linux-2.6.11-orig/net/ipv4/Kconfig	2005-03-02 04:38:17.000000000 -0300
+++ linux-2.6.11/net/ipv4/Kconfig	2005-04-12 21:59:35.000000000 -0300
@@ -337,6 +187,14 @@
 	  and you should also say Y to "Kernel/User network link driver",
 	  below. If unsure, say N.
 
+config TCPCP
+	tristate "IP: TCP connection passing (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  Support for retrieving internal information of TCP connections, and
+	  re-creating connections from this information. Highly experimental,
+	  so if in doubt, just say "No".
+
 config SYN_COOKIES
 	bool "IP: TCP syncookie support (disabled per default)"
	---help---
--- linux-2.6.11-orig/net/ipv4/Makefile	2005-03-02 04:37:50.000000000 -0300
+++ linux-2.6.11/net/ipv4/Makefile	2005-04-12 21:59:35.000000000 -0300
@@ -7,7 +7,8 @@
 	     ip_output.o ip_sockglue.o \
 	     tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
 	     datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
-	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
+	     sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o \
+	     tcpcp_hooks.o
 
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
@@ -23,6 +24,7 @@
 obj-$(CONFIG_IP_VS) += ipvs/
 obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o 
 obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o
+obj-$(CONFIG_TCPCP) += tcpcp.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
 		      xfrm4_output.o
--- linux-2.6.11-orig/net/ipv4/sysctl_net_ipv4.c	2005-03-02 04:38:17.000000000 -0300
+++ linux-2.6.11/net/ipv4/sysctl_net_ipv4.c	2005-04-12 21:59:35.000000000 -0300
@@ -48,6 +48,9 @@
 extern int inet_peer_gc_mintime;
 extern int inet_peer_gc_maxtime;
 
+/* From tcpcp.c */
+extern int sysctl_tcpcp_privileged;
+
 #ifdef CONFIG_SYSCTL
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
@@ -690,6 +693,16 @@
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+	{
+		.ctl_name	= NET_TCP_CP_PRIV, 
+		.procname	= "tcpcp_privileged",
+		.data		= &sysctl_tcpcp_privileged,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
--- linux-2.6.11-orig/net/ipv4/ip_sockglue.c	2005-03-02 04:37:52.000000000 -0300
+++ linux-2.6.11/net/ipv4/ip_sockglue.c	2005-04-12 21:59:35.000000000 -0300
@@ -1087,7 +1087,7 @@
 
 EXPORT_SYMBOL(ip_cmsg_recv);
 
-#ifdef CONFIG_IP_SCTP_MODULE
+#if defined(CONFIG_IP_SCTP_MODULE) || defined(CONFIG_TCPCP_MODULE)
 EXPORT_SYMBOL(ip_getsockopt);
 EXPORT_SYMBOL(ip_setsockopt);
 #endif
--- linux-2.6.11-orig/net/ipv4/tcp.c	2005-03-02 04:37:51.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcp.c	2005-04-12 22:02:55.000000000 -0300
@@ -257,6 +257,7 @@
 #include <linux/fs.h>
 #include <linux/random.h>
 #include <linux/bootmem.h>
+#include <linux/tcpcp.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
@@ -2097,6 +2098,15 @@
 		}
 		break;
 
+	case TCP_ICI:
+		err = tcpcp_setici(sk,(const struct tcpcp_ici *) optval,
+		    optlen);
+		break;
+
+	case TCP_CP_FN:
+		err = tcpcp_fn(sk,val);
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2236,6 +2246,15 @@
 	case TCP_QUICKACK:
 		val = !tp->ack.pingpong;
 		break;
+	case TCP_MAXICISIZE: {
+		int error;
+
+		error = tcpcp_maxicisize(sk,&val);
+		if (error) return error;
+		break;
+	}
+	case TCP_ICI:
+		return tcpcp_getici(sk,(struct tcpcp_ici *) optval,optlen);
 	default:
 		return -ENOPROTOOPT;
 	};
--- linux-2.6.11-orig/net/ipv4/tcp_input.c	2005-03-02 04:38:17.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcp_input.c	2005-04-12 22:04:40.000000000 -0300
@@ -1348,7 +1348,7 @@
 
 static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
 {
-	return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto);
+	return (tp_time_stamp(tp) - TCP_SKB_CB(skb)->when > tp->rto);
 }
 
 static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
@@ -1978,7 +1978,7 @@
 	 * answer arrives rto becomes 120 seconds! If at least one of segments
 	 * in window is lost... Voila.	 			--ANK (010210)
 	 */
-	seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
+	seq_rtt = tp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
 	tcp_rtt_estimator(tp, seq_rtt);
 	tcp_set_rto(tp);
 	tp->backoff = 0;
@@ -2415,7 +2415,7 @@
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	__u32 now = tcp_time_stamp;
+	__u32 now = tp_time_stamp(tp);
 	int acked = 0;
 	__s32 seq_rtt = -1;
 
@@ -4470,7 +4470,7 @@
 
 		if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 		    !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
-			     tcp_time_stamp)) {
+			     tp_time_stamp(tp))) {
 			NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
 			goto reset_and_undo;
 		}
@@ -4947,8 +4947,12 @@
 	return 0;
 }
 
+EXPORT_SYMBOL(sysctl_tcp_timestamps);
+EXPORT_SYMBOL(sysctl_tcp_window_scaling);
+EXPORT_SYMBOL(sysctl_tcp_sack);
 EXPORT_SYMBOL(sysctl_tcp_ecn);
 EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
 EXPORT_SYMBOL(tcp_parse_options);
 EXPORT_SYMBOL(tcp_rcv_established);
 EXPORT_SYMBOL(tcp_rcv_state_process);
--- linux-2.6.11-orig/net/ipv4/tcp_ipv4.c	2005-03-02 04:37:54.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcp_ipv4.c	2005-04-12 21:59:35.000000000 -0300
@@ -1200,7 +1200,7 @@
  */
 
 static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack,
-			    u32 win, u32 ts)
+			    u32 win, u32 ts_out, u32 ts)
 {
 	struct tcphdr *th = skb->h.th;
 	struct {
@@ -1218,7 +1218,7 @@
 		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 				     (TCPOPT_TIMESTAMP << 8) |
 				     TCPOLEN_TIMESTAMP);
-		rep.tsopt[1] = htonl(tcp_time_stamp);
+		rep.tsopt[1] = htonl(ts_out);
 		rep.tsopt[2] = htonl(ts);
 		arg.iov[0].iov_len = sizeof(rep);
 	}
@@ -1247,7 +1247,8 @@
 	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 
 	tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
-			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+			tw->tw_rcv_wnd >> tw->tw_rcv_wscale,
+			tw_time_stamp(tw), tw->tw_ts_recent);
 
 	tcp_tw_put(tw);
 }
@@ -1255,7 +1256,7 @@
 static void tcp_v4_or_send_ack(struct sk_buff *skb, struct open_request *req)
 {
 	tcp_v4_send_ack(skb, req->snt_isn + 1, req->rcv_isn + 1, req->rcv_wnd,
-			req->ts_recent);
+			tcp_time_stamp, req->ts_recent);
 }
 
 static struct dst_entry* tcp_v4_route_req(struct sock *sk,
--- linux-2.6.11-orig/net/ipv4/tcp_minisocks.c	2005-03-02 04:38:17.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcp_minisocks.c	2005-04-12 21:59:35.000000000 -0300
@@ -377,6 +377,11 @@
 			tw->tw_v6_ipv6only = 0;
 		}
 #endif
+
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+		tw->ts_offset = tp->ts_offset;
+#endif
+
 		/* Linkage updates. */
 		__tcp_tw_hashdance(sk, tw);
 
--- linux-2.6.11-orig/net/ipv4/tcp_output.c	2005-03-02 04:38:38.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcp_output.c	2005-04-12 22:07:22.000000000 -0300
@@ -428,7 +428,7 @@
 
 	if (tcp_snd_test(tp, skb, cur_mss, TCP_NAGLE_PUSH)) {
 		/* Send it out now. */
-		TCP_SKB_CB(skb)->when = tcp_time_stamp;
+		TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 		tcp_tso_set_push(skb);
 		if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
 			sk->sk_send_head = NULL;
@@ -765,7 +765,7 @@
 					break;
 			}
 
-			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 			tcp_tso_set_push(skb);
 			if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
 				break;
@@ -1107,7 +1107,7 @@
 	/* Make a copy, if the first transmission SKB clone we made
 	 * is still in somebody's hands, else make a clone.
 	 */
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 	tcp_tso_set_push(skb);
 
 	err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
@@ -1315,7 +1315,7 @@
 	/* Send it off. */
 	TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 	if (tcp_transmit_skb(sk, skb))
 		NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
 }
@@ -1349,7 +1349,7 @@
 		TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ACK;
 		TCP_ECN_send_synack(tcp_sk(sk), skb);
 	}
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	TCP_SKB_CB(skb)->when = tp_time_stamp(tcp_sk(sk));
 	return tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 }
 
@@ -1412,7 +1412,7 @@
 	/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
 	th->window = htons(req->rcv_wnd);
 
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
 			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
 			      TCP_SKB_CB(skb)->when,
@@ -1504,7 +1504,7 @@
 	tcp_ca_init(tp);
 
 	/* Send it off. */
-	TCP_SKB_CB(buff)->when = tcp_time_stamp;
+	TCP_SKB_CB(buff)->when = tp_time_stamp(tp);
 	tp->retrans_stamp = TCP_SKB_CB(buff)->when;
 	__skb_queue_tail(&sk->sk_write_queue, buff);
 	sk_charge_skb(sk, buff);
@@ -1600,7 +1600,7 @@
 
 		/* Send it off, this clears delayed acks for us. */
 		TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
-		TCP_SKB_CB(buff)->when = tcp_time_stamp;
+		TCP_SKB_CB(buff)->when = tp_time_stamp(tp);
 		tcp_transmit_skb(sk, buff);
 	}
 }
@@ -1640,7 +1640,7 @@
 	 */
 	TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1;
 	TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
-	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+	TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 	return tcp_transmit_skb(sk, skb);
 }
 
@@ -1680,7 +1680,7 @@
 				tcp_set_skb_tso_segs(skb, tp->mss_cache_std);
 
 			TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
-			TCP_SKB_CB(skb)->when = tcp_time_stamp;
+			TCP_SKB_CB(skb)->when = tp_time_stamp(tp);
 			tcp_tso_set_push(skb);
 			err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
 			if (!err) {
@@ -1734,7 +1734,11 @@
 	}
 }
 
+EXPORT_SYMBOL(tcp_push_one);
+EXPORT_SYMBOL(tcp_current_mss);
 EXPORT_SYMBOL(tcp_connect);
 EXPORT_SYMBOL(tcp_make_synack);
 EXPORT_SYMBOL(tcp_simple_retransmit);
+EXPORT_SYMBOL(tcp_retransmit_skb);
 EXPORT_SYMBOL(tcp_sync_mss);
+EXPORT_SYMBOL(tcp_send_probe0);
--- linux-2.6.11-orig/net/ipv6/tcp_ipv6.c	2005-03-02 04:38:10.000000000 -0300
+++ linux-2.6.11/net/ipv6/tcp_ipv6.c	2005-04-12 21:59:35.000000000 -0300
@@ -1061,7 +1061,8 @@
 	kfree_skb(buff);
 }
 
-static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
+static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
+    u32 ts_out, u32 ts)
 {
 	struct tcphdr *th = skb->h.th, *t1;
 	struct sk_buff *buff;
@@ -1094,7 +1095,7 @@
 		u32 *ptr = (u32*)(t1 + 1);
 		*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
 			       (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
-		*ptr++ = htonl(tcp_time_stamp);
+		*ptr++ = htonl(ts_out);
 		*ptr = htonl(ts);
 	}
 
@@ -1131,14 +1132,16 @@
 	struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 
 	tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
-			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
+			tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw_time_stamp(tw),
+			tw->tw_ts_recent);
 
 	tcp_tw_put(tw);
 }
 
 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
 {
-	tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
+	tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd,
+			tcp_time_stamp, req->ts_recent);
 }
 
 
--- /dev/null	1969-12-31 21:00:00.000000000 -0300
+++ linux-2.6.11/include/linux/tcpcp.h	2005-04-12 21:59:35.000000000 -0300
@@ -0,0 +1,227 @@
+/*
+ * linux/tcpcp.h - TCP connection passing, data structures and prototypes
+ *
+ * Written 2002 by Werner Almesberger
+ */
+
+#ifndef _LINUX_TCPCP_H
+#define _LINUX_TCPCP_H
+
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/types.h>
+#endif /* __KERNEL__ */
+
+
+/*
+ * The socket option numbers should be in linux/tcp.h, but tcp.h can't be
+ * included by user space, so in order to avoid duplication, they go here.
+ */
+
+#define TCP_MAXICISIZE		13      /* Max. size of Internal Conn. Info */
+#define TCP_ICI			14      /* Retrieve/set Internal Conn. Info */
+#define TCP_CP_FN		15      /* Perform special tcpcp operation */
+
+
+/* TCP_KICK sub-function codes */
+#define TCPCP_ACTIVATE		0	/* Activate dormant connection */
+
+/*
+ * For simplicity, ICIs (Internal Connection Information) use a fixed-size
+ * struct, which is followed by the variable-size send and receive buffers.
+ *
+ * All ICI elements are padded to a multiple of four bytes. Numbers are always
+ * in network byte order.
+ *
+ * When extending the ICI, fields that can safely be ignored should be added in
+ * new IEs, before the buffer list, and only the minor version number needs to
+ * be incremented. If the content of existing IEs changes, or if new
+ * information can't be simply ignored, the major version number must be
+ * incremented.
+ *
+ * Should ICI use TLVs (Type-Length-Value), like netlink does ? Well, maybe.
+ * TLVs are more flexible, but they have also a bit more implementation
+ * overhead, and you can't just "print" them from a debugger. So for now,
+ * plain structs are better for development.
+ */
+
+/*
+ * ICIEs represent a more or less arbitrary division of ICI data. The idea
+ * behind dividing this into separate elements is to allow for future
+ * replacements of relatively small blocks, in case kernel data structures
+ * change.
+ */
+
+/* ICI element: ICI version and flags */
+
+struct tcpcp_icie_version {
+	uint8_t major;		/* incompatible structure revision */
+				/*  0: current version */
+	uint8_t minor;		/* compatible structure extension */
+				/*  0: current version */
+	uint8_t ip_version;	/* IP version */
+				/*   4: IPv4 */
+	uint8_t __pad1;
+	uint16_t flags;		/* see TCPCP_ICIF_*, below */
+	uint16_t __pad2;
+	uint16_t ici_hdr_size;	/* sizeof(struct tcpcp_ici) */
+	uint16_t buf_hdr_size;	/* sizeof(struct tcpcp_icie_buf) */
+};
+
+enum {
+	TCPCP_ICIF_USEPERF = 1,	/* use perf. data (tcpcp_set_cong) */
+};
+
+
+/* ICI element: globally unique TCP connection ID */
+
+struct tcpcp_icie_id4 {
+	uint32_t ip_src;	/* source IP address */
+	uint32_t ip_dst;	/* destination IP address */
+};
+
+struct tcpcp_icie_id {
+	union {
+		struct tcpcp_icie_id4 v4; /* IPv4 */
+	} ip;
+	uint16_t tcp_sport;	/* TCP source port */
+	uint16_t tcp_dport;	/* TCP destination port */
+};
+
+
+/* ICI element: fixed general data */
+
+struct tcpcp_icie_fixgen {
+	uint8_t tcp_flags;	/* TCP flags; from linux/tcp.h */
+				/*  1: TCPI_OPT_TIMESTAMPS */
+				/*  2: TCPI_OPT_SACK */
+				/*  4: TCPI_OPT_WSCALE */
+				/*  8: TCPI_OPT_ECN */
+	uint8_t	snd_wscale;	/* send window scale (0 if unused) */
+	uint8_t rcv_wscale;	/* receive window scale (0 if unused) */
+	uint8_t __pad;
+	uint16_t snd_mss;	/* MSS sent */
+	uint16_t rcv_mss;	/* MSS received */
+};
+
+
+/* ICI element: variable general data */
+
+struct tcpcp_icie_vargen {
+ 	uint8_t state;		/* connection state; from linux/tcp.h */
+				/*  1: TCP_ESTABLISHED */
+				/*  2: TCP_SYN_SENT */
+				/*  3: TCP_SYN_RECV */
+				/*  4: TCP_FIN_WAIT1 */
+				/*  5: TCP_FIN_WAIT2 */
+				/*  6: TCP_TIME_WAIT */
+				/*  7: TCP_CLOSE */
+				/*  8: TCP_CLOSE_WAIT */
+				/*  9: TCP_LAST_ACK */
+				/* 10: TCP_LISTEN */
+				/* 11: TCP_CLOSING */
+				/* Note: TCP_ICI may not ever use some of these
+				   values. */
+	uint8_t __pad1;
+	uint8_t __pad2;
+	uint8_t __pad3;
+	uint32_t snd_nxt;	/* sequence number of next new byte to send */
+	uint32_t rcv_nxt;	/* sequence number of next new byte expected to
+				   receive */
+	uint32_t snd_wnd;	/* window received from peer */
+	uint32_t rcv_wnd;	/* window advertized to peer */
+	uint32_t ts_recent;	/* cached timestamp from peer (0 if none) */
+	uint32_t ts_gen;	/* current locally generated timestamp */
+				/* (0 if not using timestamps) */
+};
+
+
+/* ICI element: congestion avoidance data */
+
+struct tcpcp_icie_cong {
+};
+
+
+/* ICI element: connection statistics */
+
+struct tcpcp_icie_stat {
+	/* [0-3]: retransmits 
+	/ * [4-7]: probes sent
+	/ * [8-11]: backoff */
+};
+
+
+/* ICI element: send or receive buffer */
+
+struct tcpcp_icie_buf {
+	/*** These fields must be first and in this order ! ******************/
+	uint8_t type;		/* buffer type (TCPCP_ICIE_BUF_*, see below) */
+	uint8_t __pad;                                                     /**/
+	uint16_t length;	/* segment data length                       */
+	/*********************************************************************/
+	uint32_t seq;		/* sequence number of first byte */
+	uint8_t data[0];	/* data, padded to multiple of 4 bytes */
+};
+
+enum {
+	TPCPC_ICIE_BUF_SND = 1,	/* send buffer (only TCP segment, no IP) */
+	TPCPC_ICIE_BUF_OOO = 2,	/* out of order buffer (only TCP segment) */
+};
+
+
+/* Internal Connection Information (ICI) */
+
+struct tcpcp_ici {
+	uint32_t ici_length;	/* total length of ICI */
+	struct tcpcp_icie_version v; /* ICI version and flags */
+	struct tcpcp_icie_id id; /* globally unique TCP connection ID */
+	struct tcpcp_icie_fixgen fixgen; /* fixed general data */
+	struct tcpcp_icie_vargen vargen; /* variable general data */
+	struct tcpcp_icie_cong cong; /* congestion avoidance data */
+	struct tcpcp_icie_stat stat; /* connection statistics */
+	/* ----- ADD NEW IEs HERE ----- */
+	struct tcpcp_icie_buf buf[0];
+};
+
+/*
+ * Buffers are in sequence, first all send, then all out-of-order buffers.
+ * Buffers must not overlap, and may not contain any extraneous data (e.g.
+ * ack'ed bytes, or such). snd_nxt does not have to be at a buffer boundary.
+ */
+
+
+#ifdef __KERNEL__
+
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+
+#include <net/sock.h>
+
+extern int sysctl_tcpcp_privileged;
+
+extern int (*tcpcp_maxicisize_hook)(struct sock *sk,int *size);
+extern int (*tcpcp_getici_hook)(struct sock *sk,struct tcpcp_ici *user_ici,
+    int *user_size);
+extern int (*tcpcp_setici_hook)(struct sock *sk,
+    const struct tcpcp_ici *user_ici,int size);
+extern int (*tcpcp_fn_hook)(struct sock *sk,int fn_code);
+
+void tcpcp_lock_hooks(void);
+void tcpcp_unlock_hooks(void);
+
+int tcpcp_maxicisize(struct sock *sk,int *size);
+int tcpcp_getici(struct sock *sk,struct tcpcp_ici *user_ici,int *user_size);
+int tcpcp_setici(struct sock *sk,const struct tcpcp_ici *user_ici,int size);
+int tcpcp_fn(struct sock *sk,int fn_code);
+
+#else /* defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE) */
+
+#define tcpcp_maxicisize(sk,val) (-ENOPROTOOPT)
+#define tcpcp_getici(sk,val,size) (-ENOPROTOOPT)
+#define tcpcp_setici(sk,val,size) (-ENOPROTOOPT)
+#define tcpcp_fn(sk,fn_code) (-ENOPROTOOPT)
+
+#endif /* !defined(CONFIG_TCPCP) && !defined(CONFIG_TCPCP_MODULE) */
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_TCPCP_H */
--- /dev/null	1969-12-31 21:00:00.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcpcp.c	2005-04-16 20:13:00.000000000 -0300
@@ -0,0 +1,1096 @@
+/*
+ * tcpcp.c - TCP connection passing
+ *
+ * Written 2002-2005 by Werner Almesberger
+ */
+
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/net.h>
+#include <linux/tcp.h>
+#include <linux/security.h>
+#include <linux/tcpcp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+#include <net/inet_common.h>
+#include <asm/uaccess.h>
+
+
+#if 1
+#define DPRINTK(...) printk(KERN_DEBUG __VA_ARGS__)
+#else
+#define DPRINTK(...)
+#endif
+
+#define TCPCP_CURRENT_ICI_MAJOR	0
+#define TCPCP_CURRENT_ICI_MINOR	0
+
+
+/* ----- Get maximum ICI size ---------------------------------------------- */
+
+
+static int __tcpcp_maxicisize(struct sock *sk,int *size)
+{
+	DPRINTK("tcpcp_maxicisize(sk %p,size %p)\n",sk,size);
+	if (sk->sk_socket->state != SS_CONNECTED)
+		return -EBADFD;
+	lock_sock(sk);
+	/*
+	 * Allocations against sk->sk_rcvbuf and sk->sk_sndbuf are made for
+	 * skb->sk_truesize, so they include skb overhead, which is
+	 * significantly bigger than per-buffer overhead in the ICI, so the ICI
+	 * size estimate errs on the safe side.
+	 */
+	*size = sizeof(struct tcpcp_ici)+sk->sk_rcvbuf+sk->sk_sndbuf;
+	release_sock(sk);
+	DPRINTK("  size = %d\n",*size);
+	return 0;
+}
+
+
+/* ----- Get Internal Connection Information (ICI) ------------------------- */
+
+
+/*
+ * No flags ? Indeed, we don't copy TCP flags.
+ *
+ * Since we only dump connections in state ESTABLISHED, we don't have to worry
+ * about segments with SYN, or outbound FIN. Also, any inbound segments with
+ * FIN have not been acknowledged yet (or the connection would be in
+ * CLOSE_WAIT), so we can just ignore the FIN bit, and wait for the
+ * retransmission.
+ *
+ * The PSH flag is of little practical relevance, and easily regenerated in the
+ * outbound direction, so we just ignore it. RST is similar to FIN - if
+ * something was wrong enough to cause RST, it will continue to be wrong when
+ * the retransmission hits. We don't queue outbound RSTs.
+ *
+ * The information of inbound ACKs is already represented in the connection
+ * state, so the ACK data in inbound segments is redundant. Outbound ACKs will
+ * just be re-generated, with up to date information.
+ *
+ * This leaves URG. For now, we just completely ignore that stuff. (@@@)
+ */
+
+static int put_buffer(uint8_t type,struct tcpcp_icie_buf **user_buf,
+    const void *end,struct sk_buff *skb)
+{
+	struct tcpcp_icie_buf buf;
+	int error,length;
+
+	DPRINTK("put_buffer(type %d,user_buf %p,end %p,skb %p\n",
+	    type,*user_buf,end,skb);
+
+	/*
+	 * All the cloning and linearizing is terribly inefficient, but that's
+	 * not really an issue here. The data will be copied a few times
+	 * anyway, and normally, we're never going to put these skbs on the
+	 * network.
+	 */
+	error = skb_linearize(skb,GFP_KERNEL);
+	if (error)
+		return error;
+
+	buf.type = type;
+	length = TCP_SKB_CB(skb)->end_seq-TCP_SKB_CB(skb)->seq;
+	buf.length = htons(length);
+	buf.seq = htonl(TCP_SKB_CB(skb)->seq);
+	DPRINTK("  skb->len %u, length %d, seq %lu\n",skb->len,length,
+	    (unsigned long) ntohl(buf.seq));
+	if (copy_to_user(*user_buf,&buf,sizeof(buf)))
+		return -EFAULT;
+	if (copy_to_user((*user_buf)->data,skb->tail-length,length))
+		return -EFAULT;
+	*user_buf =
+	    (struct tcpcp_icie_buf *) ((*user_buf)->data+((length+3) & ~3));
+	return 0;
+}
+
+
+/*
+ * In order to avoid keeping the socket locked "forever", and accumulating a
+ * hefty backlog, we unlock the socket during copies to user space. Changes to
+ * socket variables during these copies don't matter, because we either don't
+ * guarantee atomicity anyway (TCP_ICI), or we have silenced the socket, so no
+ * resposes will go to the peer, and the only thing that could change (e.g. if
+ * the user attempts concurrent writes) is buffer content, in which case we
+ * report an error.
+ *
+ * (Probably false) assumption: if anything happens to the send/retransmit
+ * buffer, we see this in a change of the list head/tail.
+ *
+ * If there's no useful way for detecting inconsistencies, maybe we just have
+ * to drop TCP_ICI without shutdown. It's not all that useful anyway ...
+ */
+
+
+static int copy_queue(struct sock *sk,uint8_t type,struct tcpcp_icie_buf **buf,
+    const void *end,struct sk_buff_head *queue)
+{
+	const struct sk_buff *old_head,*old_tail;
+	struct sk_buff *skb,*clone;
+	int error;
+
+	DPRINTK("copy_queue(sk %p,type %d,buf %p->%p,end %p,queue %p\n",sk,
+	    type,buf,*buf,end,queue);
+	old_head = skb_peek(queue);
+	old_tail = skb_peek_tail(queue);
+	skb_queue_walk(queue,skb) {
+		clone = skb_clone(skb,GFP_KERNEL);
+		if (!clone)
+			return -ENOMEM;
+		release_sock(sk);
+		error = put_buffer(type,buf,end,clone);
+		lock_sock(sk);
+		kfree_skb(clone);
+		if (error) return error;
+		if (old_head != skb_peek(queue) ||
+		    old_tail != skb_peek_tail(queue))
+			return -EBUSY;
+	}
+	return 0;
+}
+
+
+static int do_getici(struct sock *sk,struct tcpcp_ici *ici,
+    struct tcpcp_ici *user_ici,int *size)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct tcpcp_icie_buf *buf;
+	void *end;
+	int error;
+
+	DPRINTK("do_getici(sk %p,ici %p,user_ici %p,size %p = %d)\n",sk,ici,
+	    user_ici,size,*size);
+	DPRINTK("  sk->sk_send_head %p (seq %lu)\n",sk->sk_send_head,
+	    sk->sk_send_head ?
+	    (unsigned long) TCP_SKB_CB(sk->sk_send_head)->seq : 0);
+	DPRINTK("  tp->snd_una = %lu, tp->snd_sml = %lu\n",
+	    (unsigned long) tp->snd_una,(unsigned long) tp->snd_sml);
+	DPRINTK("  tp->pushed_seq = %lu, skb->copied_seq = %lu\n",
+	    (unsigned long) tp->pushed_seq,(unsigned long) tp->copied_seq);
+	DPRINTK("  tp->snd_wnd = %lu, tp->rcv_wnd = %lu\n",
+	    (unsigned long) tp->snd_wnd,(unsigned long) tp->rcv_wnd);
+	DPRINTK("  tp->packets_out %lu, sk->sk_wmem_alloc %lu\n",
+	    (unsigned long) tp->packets_out,
+	    (unsigned long) atomic_read(&sk->sk_wmem_alloc));
+	DPRINTK("  sk->sk_wmem_alloc %lu, sk->sk_wmem_queued %lu, "
+	    "sk->sk_sndbuf %lu\n",
+	    (unsigned long) atomic_read(&sk->sk_wmem_alloc),
+	    (unsigned long) sk->sk_wmem_queued,(unsigned long) sk->sk_sndbuf);
+
+	memset(ici,0,sizeof(*ici));
+		/* make sure padding bytes don't leak data */
+
+	/* version */
+	ici->v.major = TCPCP_CURRENT_ICI_MAJOR;
+	ici->v.minor = TCPCP_CURRENT_ICI_MINOR;
+	ici->v.ip_version = 4;
+	ici->v.flags = 0;
+	ici->v.ici_hdr_size = htons(sizeof(struct tcpcp_ici));
+	ici->v.buf_hdr_size = htons(sizeof(struct tcpcp_icie_buf));
+
+	/* globally unique TCPv4 connection ID */
+	ici->id.ip.v4.ip_src = inet->saddr;
+	ici->id.ip.v4.ip_dst = inet->daddr;
+	ici->id.tcp_sport = inet->sport;
+	ici->id.tcp_dport = inet->dport;
+
+	/* fixed general data */
+	ici->fixgen.tcp_flags = 0;
+	if (tp->rx_opt.tstamp_ok)
+		ici->fixgen.tcp_flags |= TCPI_OPT_TIMESTAMPS;
+	if (tp->rx_opt.sack_ok)
+		ici->fixgen.tcp_flags |= TCPI_OPT_SACK;
+	if (tp->rx_opt.wscale_ok) {
+		ici->fixgen.tcp_flags |= TCPI_OPT_WSCALE;
+		ici->fixgen.snd_wscale = tp->rx_opt.snd_wscale;
+		ici->fixgen.rcv_wscale = tp->rx_opt.rcv_wscale;
+	}
+	else {
+		ici->fixgen.snd_wscale = 0;
+		ici->fixgen.rcv_wscale = 0;
+	}
+	if (tp->ecn_flags & TCP_ECN_OK)
+		ici->fixgen.tcp_flags |= TCPI_OPT_ECN;
+	ici->fixgen.snd_mss = htons(tp->advmss);
+	ici->fixgen.rcv_mss = htons(tp->rx_opt.mss_clamp);
+
+	/* variable general data */
+	ici->vargen.state = sk->sk_state;
+	ici->vargen.snd_nxt = htonl(tp->snd_nxt);
+	ici->vargen.rcv_nxt = htonl(tp->rcv_nxt);
+	ici->vargen.snd_wnd = htonl(tp->snd_wnd);
+	ici->vargen.rcv_wnd = htonl(tcp_receive_window(tp));
+	ici->vargen.ts_gen = htonl(tp_time_stamp(tp));
+	ici->vargen.ts_recent =
+	    htonl(tp->rx_opt.saw_tstamp ? tp->rx_opt.ts_recent : 0);
+
+	/* copy buffers */
+	buf = (struct tcpcp_icie_buf *) &user_ici->buf;
+	end = (char *) user_ici+*size;
+	error = copy_queue(sk,TPCPC_ICIE_BUF_SND,&buf,end,&sk->sk_write_queue);
+	if (error)
+		return error;
+	error = copy_queue(sk,TPCPC_ICIE_BUF_OOO,&buf,end,
+	   &tp->out_of_order_queue);
+	if (error)
+		return error;
+
+	*size = (char *) buf-(char *) user_ici;
+	ici->ici_length = htonl(*size);
+	DPRINTK("  size = %d\n",*size);
+	return 0;
+}
+
+
+static int do_tcpcp_getici(struct sock *sk,struct tcpcp_ici *user_ici,
+  int *user_size)
+{
+	struct tcpcp_ici *ici;
+	int size,error;
+
+	DPRINTK("do_tcpcp_getici(sk %p,user_ici %p,user_size %p)\n",sk,
+	    user_ici,user_size);
+
+	if (get_user(size,user_size))
+		return -EFAULT;
+	if (size < sizeof(struct tcpcp_ici))
+		return -EMSGSIZE;
+
+	ici = kmalloc(sizeof(struct tcpcp_ici),GFP_KERNEL);
+	if (!ici)
+		return -ENOMEM;
+
+	lock_sock(sk);
+	error = do_getici(sk,ici,user_ici,&size);
+	release_sock(sk);
+
+	if (!error && put_user(size,user_size))
+		error = -EFAULT;
+	if (!error && copy_to_user(user_ici,ici,sizeof(*ici)))
+		error = -EFAULT;
+
+	kfree(ici);
+	return error;
+}
+
+
+/* ----- Get Internal Connection Information (ICI), plus shutdown ---------- */
+
+
+/*
+ * FFS: don't filter, but reconstruct any changes possibly caused by new
+ * packets on the fly. shouldn't be too hard, because in most cases, we
+ * put them into the backlog anyway. @@@
+ *
+ * Advantages:
+ * - no dependency on CONFIG_FILTER
+ * - no conflict with obscure uses of CONFIG_FILTER
+ *
+ * What can happen ?
+ * - we get moved from ESTABLISHED to CLOSE_WAIT
+ * - segments move from ooo to receive buffer
+ * - overlapping segments get recombined (aiee!)
+ * - new segments (ACK, window probe) get added to send buffer (are they
+ *   queued ?)
+ * - user may add/remove data, but that's a violation of the stability
+ *   requirement, and we just quit in this case
+ */
+
+static struct sk_filter drop_all = {
+	.refcnt = ATOMIC_INIT(1),	/* never kfree this ... */
+	.len = 0,			/* return 0 */
+};
+
+
+static struct tcp_func tcpcp_af_specific;
+
+
+static int tcpcp_discard_xmit(struct sk_buff *skb,int ipfragok)
+{
+	DPRINTK("tcpcp_discard_xmit(skb %p)\n",skb);
+	kfree_skb(skb);
+	return 0;
+}
+
+
+/*
+ * NOTE: backlog is lost. There's probably not much there anyway ...
+ * (At least not in a way that would be particularly useful for us - i.e.
+ * segments accumulated while we're holding the socket lock would be pretty
+ * hard to use.)
+ *
+ * NOTE: also the error queue is lost. The data in there isn't guaranteed
+ * anyway, and actual uses of this with TCP seem to be rare.
+ */
+
+static int __tcpcp_getici(struct sock *sk,struct tcpcp_ici *user_ici,
+    int *user_size)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	int error;
+
+	DPRINTK("tcpcp_getici(sk %p,user_ici %p,user_size %p)\n",sk,user_ici,
+	    user_size);
+
+	/*
+	 * @@@ Allow TCP states SYN_RCVD, SYN_SENT, ESTABLISHED, and
+	 * CLOSE_WAIT. May also allow accept-but-no-SYNACK state, FFS.
+	 */
+	if (sk->sk_socket->state != SS_CONNECTED)
+		return -EBADFD;
+
+	error = sock_error(sk);
+	if (error)
+		return error;
+	lock_sock(sk);
+
+	/*
+	 * Set up dummy socket filter that just drops all inbound packets.
+	 */
+	if (sk->sk_filter) sk_filter_release(sk,sk->sk_filter);
+	sk->sk_filter = &drop_all;
+	sk_filter_charge(sk,sk->sk_filter);
+
+	/*
+	 * Prevent socket from sending by inserting a fake set of AF-specific
+	 * functions. Very ugly, of course.
+	 */
+	tp->af_specific = &tcpcp_af_specific;
+
+	/*
+	 * Since our socket is now deaf and mute, there's no point in lingering
+	 */
+	sock_set_flag(sk,SOCK_LINGER);
+	sk->sk_lingertime = 0;
+
+	release_sock(sk);
+
+	error = do_tcpcp_getici(sk,user_ici,user_size);
+	if (error)
+		return error;
+
+	/*
+	 * Something may have set sk->sk_err while we've been copying data. If
+	 * so, return it now.
+	 */
+	return sock_error(sk);
+}
+
+
+/* ----- Set Internal Connection Information (ICI) ------------------------- */
+
+
+/*
+ * Instead of populating every little field "manually", we choose the more
+ * drastic but ultimately probably more reliable approach of going through as
+ * much of the normal connection setup process as possible. This bears the
+ * following risks:
+ *
+ *  - fields may be overlooked, and initialized to invalid values
+ *  - there may be side-effects (e.g. spurious packet emission)
+ *  - cleanup after tcp_v4_connect is kind of messy
+ *
+ * However, it has the following benefits:
+ *
+ *  - fields aren't likely to be left uninitialized
+ *  - most sanity checks (consistency, compatibility, security, etc.) are
+ *    performed, including ones that may be added in the future
+ *  - tcpcp has a lot fewer explicit dependencies on the rest of the TCP code
+ *  - tcpcp code is simpler
+ *
+ * We could possibly trade the messy cleanup for bad karma by also faking the
+ * SYN+ACK.
+ */
+
+
+/*
+ * tcpcp_compatibility checks that a connection with the characteristics
+ * described in the ICI is compatible with the functionality available on the
+ * local system. If not, the operation fails. (E.g. if SACK is disabled here,
+ * we cannot negotiate out of using it.)
+ */
+
+static int tcpcp_compatibility(struct sock *sk,const struct tcpcp_ici *ici)
+{
+	uint16_t flags = ntohs(ici->v.flags);
+
+	DPRINTK("tcpcp_compatibility(sk %p,ici %p,flags %u)\n",sk,ici,
+	    (unsigned) flags);
+
+	if ((flags & TCPCP_ICIF_USEPERF) && !capable(CAP_NET_RAW))
+		return -EPERM;
+
+	if ((ici->fixgen.tcp_flags & TCPI_OPT_TIMESTAMPS) &&
+	    !sysctl_tcp_timestamps)
+		return -ENOPROTOOPT;
+	if ((ici->fixgen.tcp_flags & TCPI_OPT_SACK) && !sysctl_tcp_sack)
+		return -ENOPROTOOPT;
+	if ((ici->fixgen.tcp_flags & TCPI_OPT_WSCALE) &&
+	    !sysctl_tcp_window_scaling)
+		return -ENOPROTOOPT;
+	if ((ici->fixgen.tcp_flags & TCPI_OPT_ECN) && !sysctl_tcp_ecn)
+		return -ENOPROTOOPT;
+
+// MSS compatibility ?
+// max. window >= advertized window ? (taking into account pending data)
+	return 0;
+}
+
+
+/*
+ * tcpcp_bind just binds the socket, plain and simple. Binding does not leave
+ * any nasty surprises like running timers, so we don't need to worry about
+ * locking at this point. (inet_bind does its own locking.)
+ */
+
+static int tcpcp_bind(struct sock *sk,const struct tcpcp_ici *ici)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_in addr;
+
+	DPRINTK("tcpcp_bind(sk %p,ici %p)\n",sk,ici);
+	/*
+	 * In some cases, we may not want to use the same address/port
+	 * combination as in the original connection, e.g. if the connection
+	 * switching is done by a NAT.
+	 *
+	 * Users can manipulate the source and destination address/port pairs
+	 * directly in the ICI. However, this would not allow them to let the
+	 * kernel look up the local port, without also reserving it. In order
+	 * to support that, setici simply skips the bind step if the socket is
+	 * already bound.
+	 */
+	if (inet->rcv_saddr)
+		return 0;
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = ici->id.ip.v4.ip_src;
+	addr.sin_port = ici->id.tcp_sport;
+	return inet_bind(sk->sk_socket,(struct sockaddr *) &addr,sizeof(addr));
+}
+
+
+static int tcpcp_send_buf(struct sock *sk,void *data,uint32_t seq,int length)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	int error = 0;
+
+	DPRINTK("tcpcp_send_buf(sk %p,data %p,seq %lu,length %d)\n",sk,data,
+	    (unsigned long) seq,length);
+
+	/*
+	 * @@@ check sequence numbers
+	 */
+
+	skb = alloc_skb(length+MAX_TCP_HEADER,GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+	skb_reserve(skb,MAX_TCP_HEADER);
+	skb->csum = csum_and_copy_from_user(data,skb_put(skb,length),length,0,
+	    &error);
+	if (error) {
+		kfree_skb(skb);
+		return error;
+	}
+
+	/*
+	 * Do what tcp.c:skb_entail does ...
+	 */
+	TCP_SKB_CB(skb)->seq = seq;
+	TCP_SKB_CB(skb)->end_seq = seq+length;
+	TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; /* @@@ sure ? */
+	TCP_SKB_CB(skb)->sacked = 0;
+	sk_charge_skb(sk,skb);
+	__skb_queue_tail(&sk->sk_write_queue,skb);
+
+	/*
+	 * @@@ Can send_head point to buffers with partially new and partially
+	 * old data ? If not, and somebody feeds us with such buffers (If they
+	 * come from a Linux kernel, send_head rules should be compatible. That
+	 * is, unless someone changed the rules.), we need to split the skb.
+	 */
+
+	if (between(tp->snd_nxt,seq,seq+length-1))
+		sk->sk_send_head = skb;
+	if (!sk->sk_send_head)
+	    	tp->packets_out += tcp_skb_pcount(skb);
+	return 0;
+}
+
+
+static int tcpcp_ooo_buf(struct sock *sk,void *data,uint32_t seq,int length)
+{
+	DPRINTK("tcpcp_ooo_buf(sk %p,data %p,seq %lu,length %d)\n",sk,data,
+	    (unsigned long) seq,length);
+
+	/*
+	 * Lazy bastard trick #666: TCP doesn't actually require us to preserve
+	 * OOO data, so we don't. Ha, that was easy :-)
+	 */
+	return 0;
+
+
+#if 0
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	/*
+	 * Make sure we don't get excessively unreasonable data ...
+	 */
+	if (atomic_read(&sk->sk_rmem_alloc)+length < sk->sk_rcvbuf)
+		return -ENOBUFS;
+	skb = alloc_skb(length,GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+/*
+ * when populating buffers, remember skb->cb (see tcp_ipv4.c:tcp_v4_rcv) ! ???
+ * .. and tcp.c:skb_entail
+ */
+	skb->dev =
+	skb->nh.iph =
+	skb->th.tcp =
+
+	if (copy_from_user(skb->h.th,data,length)) {
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+	/*
+	 * Packets in the out-of-order queue are already checksummed, so we
+	 * probably don't need to re-checksum everything. (FFS @@@)
+	 */
+	tcp_data_queue(sk,skb);
+	if (skb_peek(&tp->out_of_order_queue) != skb) {
+		printk(KERN_DEBUG "tcpcp_ooo_buf: skb didn't show up in "
+		    "queue\n");
+		return -ENOBUFS;
+	}
+
+	return 0;
+#endif
+}
+
+
+static int tcpcp_buffers(struct sock *sk,const struct tcpcp_ici *ici,
+    const struct tcpcp_ici *user_ici,int size)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct tcpcp_icie_buf *buf = user_ici->buf;
+	void *end = (char *) user_ici+size;
+	int first_send = 1;
+
+	DPRINTK("tcpcp_buffers(sk %p,ici %p,user_ici %p,size %d)\n",sk,ici,
+	    user_ici,size);
+/*
+ * set up sequence numbers so that tcp_data_queue works @@@
+ */
+	tp->snd_nxt = tp->snd_una = tp->write_seq = ntohl(ici->vargen.snd_nxt);
+/*
+ * BEWARE!! tcp_output.c:tcp_connect_init resets lots of variables, including
+ * err, snd_una, snd_sml, rcv_nxt, rcv_wup, and copied_seq
+ */
+	DPRINTK("  tp->rcv_nxt %lu\n",(unsigned long) tp->rcv_nxt);
+
+	while (buf != end) {
+		struct tcpcp_icie_buf buf_dsc;
+		int length,error;
+		uint32_t seq;
+
+		DPRINTK("  (buf %p, end %p)\n",buf,end);
+		if ((void *) buf->data > end)
+			return -EFAULT;
+		if (copy_from_user(&buf_dsc,buf,sizeof(*buf)))
+			return -EFAULT;
+		length = ntohs(buf_dsc.length);
+		seq = ntohl(buf_dsc.seq);
+		DPRINTK("  (type %d,length %d,seq %lu)\n",buf_dsc.type,length,
+		    (unsigned long) seq);
+		switch (buf_dsc.type) {
+			case TPCPC_ICIE_BUF_SND:
+				if (first_send) {
+					tp->snd_una = seq;
+					first_send = 0;
+				}
+				else {
+					if (sk->sk_send_head) {
+						if (seq != tp->write_seq)
+							return -EINVAL;
+					}
+					else {
+						if (before(seq,tp->write_seq))
+							return -EINVAL;
+					}
+				}
+				tp->write_seq = seq+length;
+				error = tcpcp_send_buf(sk,buf->data,seq,
+				    length);
+				break;
+			case TPCPC_ICIE_BUF_OOO:
+				error = tcpcp_ooo_buf(sk,buf->data,seq,length);
+				break;
+			default:
+				return -EINVAL;
+		}
+		if (error)
+			return error;
+		buf = (struct tcpcp_icie_buf *) (buf->data+((length+3) & ~3));
+	}
+	if (!between(tp->snd_nxt,tp->snd_una,tp->write_seq))
+		return -EINVAL;
+	return 0;
+}
+
+
+/*
+ * tcpcp_connect walks TCP through to the SYN_SENT state. Since we don't want
+ * TCP to really emit a SYN packet, we "mute" the socket during all this. Note
+ * that the socket is locked , and will remain so, until after we've forced the
+ * socket into full compliance with the ICI.
+ */
+
+/*
+ * control enabling of SACK, wscale, etc. @@@
+ */
+
+static int tcpcp_connect(struct sock *sk,const struct tcpcp_ici *ici)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sockaddr_in addr;
+	struct sk_buff *skb;
+	uint32_t old_snd_una = tp->snd_una;
+	uint8_t flags;
+	int error;
+
+	DPRINTK("tcpcp_connect(sk %p,ici %p)\n",sk,ici);
+	/*
+	 * "Mute" socket
+	 */
+	tp->af_specific = &tcpcp_af_specific;
+
+	tp->rx_opt.mss_clamp = ntohs(ici->fixgen.rcv_mss);
+	tp->advmss = ntohs(ici->fixgen.snd_mss);
+
+// set up mss, sequence, timestamps?, etc.
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = ici->id.ip.v4.ip_dst;
+	addr.sin_port = ici->id.tcp_dport;
+	error = tcp_v4_connect(sk,(struct sockaddr *) &addr,sizeof(addr));
+	if (error)
+		return error;
+
+	/*
+	 * Get rid of the SYN segment enqueued by tcp_v4_connect
+	 */
+	skb = __skb_dequeue_tail(&sk->sk_write_queue);
+	if (!skb) {
+		printk(KERN_ERR "tcpcp_connect: tcp_v4_connect did not enqueue "
+		    "anything\n");
+		return -EINVAL;
+	}
+	flags = TCP_SKB_CB(skb)->flags;
+	if (!(flags & TCPCB_FLAG_SYN) || (flags & TCPCB_FLAG_ACK)) {
+		printk(KERN_ERR "tcpcp_connect: tcp_v4_connect did not enqueue "
+		    "SYN segment (0x%02x)\n",flags);
+		error = -EINVAL;
+	}
+	if (sk->sk_send_head == skb)
+		sk->sk_send_head = NULL;
+	tp->packets_out -= tcp_skb_pcount(skb);
+
+	/*
+	 * Undo sequence number change for (discarded) SYN bit
+	 */
+	tp->write_seq--;
+	tp->snd_nxt--;
+        tp->pushed_seq--;
+
+	/*
+	 * And, of course, tcp_connect also changes snd_una ...
+	 */
+	tp->snd_una = old_snd_una;
+
+	sk_stream_free_skb(sk,skb);
+	tcp_clear_xmit_timers(sk);
+	return error;
+}
+
+
+/*
+ * tcpcp_fixup adjusts all variables such that the connection looks like the
+ * one described in the ICI. At the end of tcpcp_fixup, the connection is in
+ * a valid state and fully operational.
+ */
+
+static int tcpcp_fixup(struct sock *sk,const struct tcpcp_ici *ici)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	DPRINTK("tcpcp_fixup(sk %p,ici %p)\n",sk,ici);
+	DPRINTK("  (1) tp->mss_clamp %lu, tp->mss_cache %lu\n",
+	    (unsigned long) tp->rx_opt.mss_clamp,
+	    (unsigned long) tp->mss_cache);
+	tp->rcv_nxt = tp->copied_seq = tp->rcv_wup = tp->snd_wl1 =
+	    ntohl(ici->vargen.rcv_nxt);
+	tp->snd_wnd = tp->max_window = ntohl(ici->vargen.snd_wnd);
+	tp->rcv_wnd = ntohl(ici->vargen.rcv_wnd);
+	DPRINTK("  (1) sk->sk_rcvbuf %lu, sk->sk_sndbuf %lu\n",
+	    (unsigned long) sk->sk_rcvbuf,(unsigned long) sk->sk_sndbuf);
+
+	/* @@@ should detect these a little earlier ... */
+	if (tp->rcv_wnd > tcp_space(sk)) {
+		int i;
+
+		if (tp->rcv_wnd > tcp_win_from_space(sysctl_tcp_rmem[2]))
+			return -ENOBUFS;
+		/*
+		 * tcp_win_from_space^-1
+		 * This is easier than
+	 	 * sk->sk_rcvbuf = tp->rcv_wnd << -sysctl_tcp_adv_win_scale,
+		 * and
+		 * sk->sk_rcvbuf = tp->rcv_wnd/(1-2^-sysctl_tcp_adv_win_scale)
+		 */
+		for (i = 30; i >= 0; i--) {
+			if (tp->rcv_wnd >=
+			    tcp_win_from_space((sk->sk_rcvbuf-
+			    atomic_read(&sk->sk_rmem_alloc)) | (1 << i)))
+				sk->sk_rcvbuf |= 1 << i;
+		}
+		if (tp->rcv_wnd > tcp_space(sk))
+			sk->sk_rcvbuf++;
+	}
+	if (sk->sk_sndbuf < sk->sk_wmem_queued) {
+		if (sk->sk_wmem_queued > sysctl_tcp_wmem[2])
+			return -ENOBUFS;
+		sk->sk_sndbuf = sk->sk_wmem_queued;
+		/*
+		 * @@@ better ?: export tcp_input.c:tcp_fixup_sndbuf and use
+		 * that
+		 */
+	}
+	DPRINTK("  (2) sk->sk_rcvbuf %lu, sk->sk_sndbuf %lu\n",
+	    (unsigned long) sk->sk_rcvbuf,(unsigned long) sk->sk_sndbuf);
+
+	tp->rx_opt.tstamp_ok = !!(ici->fixgen.tcp_flags & TCPI_OPT_TIMESTAMPS);
+	tp->rx_opt.sack_ok = !!(ici->fixgen.tcp_flags & TCPI_OPT_SACK);
+	tp->rx_opt.wscale_ok = !!(ici->fixgen.tcp_flags & TCPI_OPT_WSCALE);
+	tp->ecn_flags = ici->fixgen.tcp_flags & TCPI_OPT_ECN ? TCP_ECN_OK : 0;
+	tp->tcp_header_len = sizeof(struct tcphdr);
+	if (tp->rx_opt.tstamp_ok)
+		tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
+	tp->rx_opt.snd_wscale = ici->fixgen.snd_wscale;
+	tp->rx_opt.rcv_wscale = ici->fixgen.rcv_wscale;
+
+	if (tp->rx_opt.tstamp_ok && ici->vargen.ts_recent) {
+		/*
+		 * like tcp_input.c:tcp_store_ts_recent
+		 */
+		tp->rx_opt.ts_recent = ntohl(ici->vargen.ts_recent);
+		tp->rx_opt.ts_recent_stamp = xtime.tv_sec;
+	}
+	tp->ts_offset = ntohl(ici->vargen.ts_gen)-tcp_time_stamp;
+
+	tp->rx_opt.mss_clamp = ntohs(ici->fixgen.rcv_mss);
+	tcp_sync_mss(sk,dst_pmtu(__sk_dst_get(sk)));
+	DPRINTK("  (2) tp->mss_clamp %lu, tp->mss_cache %lu\n",
+	    (unsigned long) tp->rx_opt.mss_clamp,
+	    (unsigned long) tp->mss_cache);
+	
+	sk->sk_socket->state = SS_CONNECTED;
+	tcp_set_state(sk,TCP_ESTABLISHED);
+
+	/*
+	 * Ready to go !
+	 */
+	tp->af_specific = &ipv4_specific;
+	return 0;
+}
+
+
+static int do_setici(struct sock *sk,const struct tcpcp_ici *ici,
+    const struct tcpcp_ici *user_ici,int size)
+{
+	int error;
+
+	DPRINTK("do_setici(sk %p,ici %p,user_ici %p,size %d)\n",
+	    sk,ici,user_ici,size);
+	if (ici->v.major != TCPCP_CURRENT_ICI_MAJOR)
+		return -EINVAL;
+	if (size < sizeof(*ici) || size != ntohl(ici->ici_length))
+		return -EINVAL;
+	error = tcpcp_compatibility(sk,ici);
+	if (error)
+		return error;
+	error = tcpcp_bind(sk,ici);
+	if (error)
+		return error;
+	error = tcpcp_buffers(sk,ici,user_ici,size);
+	if (error)
+		return error;
+	lock_sock(sk); /* weird races may happen */
+	error = tcpcp_connect(sk,ici);
+	if (!error)
+		error = tcpcp_fixup(sk,ici);
+
+	/*
+	 * In case of an error, don't make the poor socket linger ...
+	 */
+	if (error) {
+		sock_set_flag(sk,SOCK_LINGER);
+		sk->sk_lingertime = 0;
+	}
+	release_sock(sk);
+	return error;
+}
+
+
+static int __tcpcp_setici(struct sock *sk,const struct tcpcp_ici *user_ici,
+    int size)
+{
+	struct tcpcp_ici *ici;
+	int error = 0;
+
+	DPRINTK("tcpcp_setici(sk %p,user_ici %p,size %d)\n",sk,user_ici,size);
+	if (sysctl_tcpcp_privileged && !capable(CAP_NET_RAW))
+		return -EPERM;
+	if (sk->sk_family != PF_INET)
+		return -EPFNOSUPPORT;
+	if (sk->sk_type != SOCK_STREAM)
+		return -ESOCKTNOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return -EPROTONOSUPPORT;
+	/* @@@ disconnect/kill socket if not unconnected ? */
+	if (sk->sk_socket->state != SS_UNCONNECTED)
+		return -EBADFD;
+
+#ifndef CODE_IS_MATURE
+	/*
+	 * Fine, we don't care or even know about compatibility for now.
+	 * So, anything that doesn't fit _exactly_, just gets thrown away.
+	 * Later on, we should leave this decision to do_setici.
+	 */
+	if (size < sizeof(struct tcpcp_ici))
+		return -EINVAL;
+#endif
+	if (size < sizeof(struct tcpcp_icie_version))
+		return -EINVAL;
+		/* wrong - need to consider fixed-size prefix @@@ */
+		/* use &version[1] */
+	/*
+	 * Various functions called by do_setici will attempt to lock the
+	 * socket, so we must undo tcp_setsockopt's locking here.
+	 *
+	 * We need to take into account that users may attempt to change the
+	 * socket state underneath us, so at least we shouldn't oops in such
+	 * cases. (FFS) @@@
+	 */
+	release_sock(sk);
+	ici = kmalloc(sizeof(struct tcpcp_ici),GFP_KERNEL);
+	if (!ici) {
+		error = -ENOMEM;
+		goto out;
+	}
+	if (copy_from_user(ici,user_ici,
+	    size > sizeof(struct tcpcp_ici) ? sizeof(struct tcpcp_ici) : size))
+		error = -EFAULT;
+	else error = do_setici(sk,ici,user_ici,size);
+
+out:
+	lock_sock(sk);
+	kfree(ici);
+	return error;
+}
+
+
+/* ----- Activate dormant connection (kick it) ----------------------------- */
+
+
+/*
+ * Check that the connection isn't doing anything. In particular, we want to
+ * keep people from successfully kicking "normal" connections.
+ *
+ * The indicators we use are timers and whether we've received an ACK since
+ * resurrecting the connection. The latter is needed, because timers are also
+ * stopped if all outbound data has been ACKed, and we don't have any pending
+ * delayed ACKs.
+ *
+ * @@@ The tp->rcv_tstamp test fails every once in a while, and can probably
+ * be exploited for emitting a burst of restart segments. Need to also check
+ * a reliable indicator that data hs been sent.
+ */
+
+static int check_tcp_idle(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	DPRINTK("check_tcp_idle(sk %p,tp %p)\n",sk,tp);
+	DPRINTK("  tp->pending %d, tp->ack.pending %d\n",tp->pending,
+	    tp->ack.pending);
+	DPRINTK("  tp->ack.lrcvtime %lu, tp->rcv_tstamp %lu\n",
+	    (unsigned long) tp->ack.lrcvtime,(unsigned long) tp->rcv_tstamp);
+	if (tp->pending || tp->ack.pending)
+		return -EALREADY;
+	if (tp->rcv_tstamp)
+		return -EALREADY;
+	return 0;
+}
+
+
+/*
+ * tcpcp_restart tries to get data to flow. The socket is unmuted, but still
+ * locked, so tcpcp_start can perform all operations ordinary TCP uses.
+ */
+
+static void tcpcp_restart(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+
+	DPRINTK("tcpcp_restart(sk %p)\n",sk);
+	DPRINTK("  tp->rcv_nxt %lu, tp->rcv_wup %lu\n",
+	    (unsigned long) tp->rcv_nxt, (unsigned long) tp->rcv_wup);
+
+	/*
+	 * If there is no data to send, all we can do is emit an ACK for the
+	 * last segment received from the peer. The usefulness of this is
+	 * probably marginal, since a single ACK won't be enough to cause Fast
+	 * Retransmit, and there shouldn't be any unACKed in-order data at this
+	 * moment. The only case where this ACK is useful is if we have shrunk
+	 * our window to 0.
+	 */
+	skb = skb_peek(&sk->sk_write_queue);
+	if (!skb) {
+		DPRINTK("  Empty write queue -> ACK\n");
+		/*
+		 * Actually, I lied. For now, we're just silent :-) @@@
+		 */
+		/* SEEMS TO WORK */
+		return;
+	}
+
+	/*
+	 * If we have data to retransmit, we will just do so now. This may
+	 * cause unnecessary retransmission. FFS @@@.
+	 */
+	if (sk->sk_send_head != skb) {
+		DPRINTK("  Got something to retransmit\n");
+		DPRINTK("  skb %p, seq %lu...%lu, tp->snd_una = %lu\n",skb,
+		    (unsigned long) TCP_SKB_CB(skb)->seq,
+		    (unsigned long) TCP_SKB_CB(skb)->end_seq,
+		    (unsigned long) tp->snd_una);
+		DPRINTK("  sk->sk_wmem_alloc %lu, sk->sk_wmem_queued %lu, "
+		    "sk->sk_sndbuf %lu\n",
+		    (unsigned long) atomic_read(&sk->sk_wmem_alloc),
+		    (unsigned long) sk->sk_wmem_queued,
+		    (unsigned long) sk->sk_sndbuf);
+		DPRINTK("  tp->snd_wnd %lu\n",(unsigned long) tp->snd_wnd);
+		DPRINTK("  tp->packets_out %lu\n",
+		    (unsigned long) tp->packets_out);
+		tcp_retransmit_skb(sk,skb_peek(&sk->sk_write_queue));
+		tcp_reset_xmit_timer(sk,TCP_TIME_RETRANS,tp->rto);
+		/* SEEMS TO WORK */
+		/* ANOMALY - skbs get either split into smaller segments, or
+		   merged to very large sizes, indicating that something's
+		   wrong with our (nearly nonexistent) MSS calculation */
+		return;
+	}
+
+	/*
+	 * Okay, so we have data to send, and nothing waiting for
+	 * retransmission. If the receives can still take more data
+	 * (window > 0), we send some.
+	 */
+	if (tp->snd_wnd) {
+		DPRINTK("  Can send more data (window %lu)\n",
+		    (unsigned long) tp->snd_wnd);
+		/* is this really correct ??? looks too easy :-) @@@ */
+		tcp_push_one(sk,tcp_current_mss(sk,1));
+		/* NOT TESTED (need to simulate flow where we're stopped by
+		   cwnd ...) */
+		return;
+	}
+
+	/*
+	 * If all else fails, we can still send a window probe ...
+	 */
+	DPRINTK("  Sending window probe\n");
+	tcp_send_probe0(sk);
+	/* SEEMS TO WORK */
+}
+
+
+static int __tcpcp_fn(struct sock *sk,int fn_code)
+{
+	int error;
+
+	DPRINTK("tcpcp_fn(sk %p,fn_code %d)\n",sk,fn_code);
+
+	if (sk->sk_socket->state != SS_CONNECTED)
+		return -EBADFD;
+	if (sk->sk_family != PF_INET)
+		return -EPFNOSUPPORT;
+	if (sk->sk_type != SOCK_STREAM)
+		return -ESOCKTNOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_TCP)
+		return -EPROTONOSUPPORT;
+
+	switch (fn_code) {
+		case TCPCP_ACTIVATE:
+			error = check_tcp_idle(sk);
+			if (!error)
+				tcpcp_restart(sk);
+			break;
+		default:
+			error = -ENOPROTOOPT;
+	}
+	return error;
+}
+
+
+/* ----- Initialization and (module) exit ---------------------------------- */
+
+
+static int __init tcpcp_init(void)
+{
+	tcpcp_af_specific = ipv4_specific;
+	tcpcp_af_specific.queue_xmit = tcpcp_discard_xmit;
+
+	tcpcp_lock_hooks();
+	tcpcp_maxicisize_hook = __tcpcp_maxicisize;
+	tcpcp_getici_hook = __tcpcp_getici;
+	tcpcp_setici_hook = __tcpcp_setici;
+	tcpcp_fn_hook = __tcpcp_fn;
+	tcpcp_unlock_hooks();
+
+	printk(KERN_INFO "tcpcp: ready for mischief (ICI format version "
+	    "%u.%u)\n",TCPCP_CURRENT_ICI_MAJOR,TCPCP_CURRENT_ICI_MINOR);
+	return 0;
+}
+
+
+static void __exit tcpcp_exit(void)
+{
+	tcpcp_lock_hooks();
+	tcpcp_maxicisize_hook = NULL;
+	tcpcp_getici_hook = NULL;
+	tcpcp_setici_hook = NULL;
+	tcpcp_fn_hook = NULL;
+	tcpcp_unlock_hooks();
+}
+
+
+module_init(tcpcp_init);
+module_exit(tcpcp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Werner Almesberger <werner@almesberger.net>");
+MODULE_DESCRIPTION("TCP Connection Passing");
--- /dev/null	1969-12-31 21:00:00.000000000 -0300
+++ linux-2.6.11/net/ipv4/tcpcp_hooks.c	2005-04-12 21:59:35.000000000 -0300
@@ -0,0 +1,130 @@
+
+/*
+ * tcpcp_hooks.c - Hooks when using TCP connection passing as a module
+ *
+ * Written 2002,2004 by Werner Almesberger
+ */
+
+
+#include <linux/config.h>
+
+#if defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE)
+
+#include <linux/module.h>
+#include <linux/rwsem.h>
+#include <linux/kmod.h>
+#include <linux/tcpcp.h>
+
+
+int sysctl_tcpcp_privileged = 1; /* setting TCP_ICI required CAP_NET_RAW */
+
+EXPORT_SYMBOL(sysctl_tcpcp_privileged);
+
+
+/* ----- Hooks ------------------------------------------------------------- */
+
+
+int (*tcpcp_maxicisize_hook)(struct sock *sk,int *size) = NULL;
+int (*tcpcp_getici_hook)(struct sock *sk,struct tcpcp_ici *user_ici,
+    int *user_size) = NULL;
+int (*tcpcp_setici_hook)(struct sock *sk,const struct tcpcp_ici *user_ici,
+    int size) = NULL;
+int (*tcpcp_fn_hook)(struct sock *sk,int fn_code) = NULL;
+
+EXPORT_SYMBOL(tcpcp_maxicisize_hook);
+EXPORT_SYMBOL(tcpcp_getici_hook);
+EXPORT_SYMBOL(tcpcp_setici_hook);
+EXPORT_SYMBOL(tcpcp_fn_hook);
+
+
+static DECLARE_RWSEM(tcpcp_sem);
+
+
+void tcpcp_lock_hooks(void)
+{
+	down_write(&tcpcp_sem);
+}
+
+EXPORT_SYMBOL(tcpcp_lock_hooks);
+
+
+void tcpcp_unlock_hooks(void)
+{
+	up_write(&tcpcp_sem);
+}
+
+EXPORT_SYMBOL(tcpcp_unlock_hooks);
+
+
+/* ----- Interface to ipv4/tcp.c ------------------------------------------- */
+
+
+/*
+ * @@@ Known bug: we race with module unloading vs. request_module. E.g. if
+ * tcpcp is loaded, the hook tests may pass, so we don't call request_module.
+ * Then, an unload may happen, before we down tcpcp_sem. The correct solution
+ * is to down tcpcp_sem before testing the hook, and to set some flag that
+ * tells tcpcp.c:tcpcp_init that the hooks are already locked. (Actually, make
+ * this a semaphore we down_trylock, so that concurrent manual loading doesn't
+ * break things.)
+ *
+ * Since this race is pretty obscure, we keep this as a good exercise for
+ * future regression testing with umlsim.
+ */
+
+int tcpcp_maxicisize(struct sock *sk,int *size)
+{
+	int error = -ENOSYS;
+
+	if (!tcpcp_maxicisize_hook)
+		request_module("tcpcp");
+	down_read(&tcpcp_sem);
+	if (tcpcp_maxicisize_hook)
+		error = tcpcp_maxicisize_hook(sk,size);
+	up_read(&tcpcp_sem);
+	return error;
+}
+
+
+int tcpcp_getici(struct sock *sk,struct tcpcp_ici *user_ici,int *user_size)
+{
+	int error = -ENOSYS;
+
+	if (!tcpcp_getici_hook)
+		request_module("tcpcp");
+	down_read(&tcpcp_sem);
+	if (tcpcp_getici_hook)
+		error = tcpcp_getici_hook(sk,user_ici,user_size);
+	up_read(&tcpcp_sem);
+	return error;
+}
+
+
+int tcpcp_setici(struct sock *sk,const struct tcpcp_ici *user_ici,int size)
+{
+	int error = -ENOSYS;
+
+	if (!tcpcp_setici_hook)
+		request_module("tcpcp");
+	down_read(&tcpcp_sem);
+	if (tcpcp_setici_hook)
+		error = tcpcp_setici_hook(sk,user_ici,size);
+	up_read(&tcpcp_sem);
+	return error;
+}
+
+
+int tcpcp_fn(struct sock *sk,int fn_code)
+{
+	int error = -ENOSYS;
+
+	if (!tcpcp_fn_hook)
+		request_module("tcpcp");
+	down_read(&tcpcp_sem);
+	if (tcpcp_fn_hook)
+		error = tcpcp_fn_hook(sk,fn_code);
+	up_read(&tcpcp_sem);
+	return error;
+}
+
+#endif /* defined(CONFIG_TCPCP) || defined(CONFIG_TCPCP_MODULE) */

