Socket API

 

A network programmer gets to use the following API :

An example of client server program shows the actual usage of the API in detail. Figure 1 shows the data flow between the client and the server.

Please refer server and client code examples.

Figure 1: Client - Server Connection Flow Diagram

 

Figure 2. Server Connection Queue

Socket Options

Setting and getting options for a socket is performed through two functions, setsockopt() and getsockopt(). These functions provide a single interface for getting and setting a variety of options using a number of different structures. The socket option prototypes are given below:

#include <sys/types.h>
#include <sys/socket.h>

int getsockopt( int socket, int level, int optionname, void *optval, socklen_t *optlength );

int setsockopt( int socket, int level, int optionname, void *optval, socklen_t optlength );

 

The level refers to the layer of protocol to which this option will be applied. The option name is given by optionname.  Various options exist depending on the protocol of interest. The optval argument specifies the value to be set or the location to store the option in a GET request. Finally, the optlength defines the length of the structure. As there are a number of different structures that can be used to set or get options, this parameter defines the length to avoid the call from overrunning the buffer.

LevelType

Level

Description

Option Prefix

SOL_SOCKET

Sockets layer

SO_

IPPROTO_TCP

TCP Transport layer

TCP_

IPPROTO_IP

IP Network layer

IP_


Each option level has a number of options that can be manipulated.

 

Sockets Layer Options

 

The Sockets layer options are those defined within the context of level SOL_SOCKET and focus on the Sockets API layer. The typical options for the Sockets layer are defined below;

Option Name

Description

Get or Set

SO_BROADCAST

Permits transmit of broadcast datagrams

both

SO_DEBUG

Enables debug logging

both

SO_DONTROUTE

Enables bypass of routing tables

both

SO_ERROR

Retrieve the current socket error

get

SO_LINGER

Enables linger on close if data present

both

SO_KEEPALIVE

Enables TCP Keepalive probes

both

SO_RCVBUF

Modifies the size of the socket receive buffer

both

SO_SNDBUF

Modifies the size of the socket send buffer

both

SO_RCVLOWAT

Sets the minimum byte count for input

both

SO_SNDLOWAT

Sets the minimum byte count for output

both

SO_SNDTIMEO

Sets the timeout value for output

both

SO_RCVTIMEO

Sets the timeout value for input

both

SO_REUSEADDR

Enables local address reuse

both

SO_TYPE

Retrieves the socket type

get

 

 

IP Layer Options


These options are those defined within the context of level IPPROTO_IP and focus on the IP layer.

Option Name

Description

Get or Set

IP_HDRINCL IP header precedes data in buffer both
IP_TOS Modifies the IP Type-Of-Service header field both
IP_TTL Modifies the IP Time-To-Live header field both
IP_ADD_MEMBERSHIP Join a multicast group set
IP_DROP_MEMBERSHIP Leave a multicast group set
IP_MULTICAST_IF Modify the outgoing multicast interface both
IP_MULTICAST_TTL Modify the outgoing multicast TTL both
IP_MULTICAST_LOOP Enable/Disable loopback of outgoing datagrams both


Example : IP_TOS Option

The IP_TOS option permits an application to change the Type of Service (TOS) field in the IP header of a socket. The TOS field is used to specify service precedence within networks. The TOS field permits segmenting traffic using quality of service (QOS) parameters.




Figure: IP TOS fields


The Delay, Throughput, and Reliability bits indicate a requested quality of service.

The IP_TOS field is used in LAN environments to segment traffic based upon quality of service needs. Quality of service APIs manipulate this field.

 

Accessor / Mutator Function Implementation

In the kernel code the implementation of set option function can be found in net/ipv4/ip_sockglue.c

int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
{
int val=0,err;

if (level != SOL_IP)
return -ENOPROTOOPT;

if (((1<<optname) & ((1<<IP_PKTINFO) | (1<<IP_RECVTTL) |
(1<<IP_RECVOPTS) | (1<<IP_RECVTOS) |
(1<<IP_RETOPTS) | (1<<IP_TOS) |
(1<<IP_TTL) | (1<<IP_HDRINCL) |
(1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
(1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND))) ||
optname == IP_MULTICAST_TTL ||
optname == IP_MULTICAST_LOOP) {
if (optlen >= sizeof(int)) {
if (get_user(val, (int *) optval))
return -EFAULT;
} else if (optlen >= sizeof(char)) {
unsigned char ucval;

if (get_user(ucval, (unsigned char *) optval))
return -EFAULT;
val = (int) ucval;
}
}


The get function  alonmg with various option names

int ip_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
{
int val;
int len;

if(level!=SOL_IP)
return -EOPNOTSUPP;

#ifdef CONFIG_IP_MROUTE
if(optname>=MRT_BASE && optname <=MRT_BASE+10)
{
return ip_mroute_getsockopt(sk,optname,optval,optlen);
}
#endif

if(get_user(len,optlen))
return -EFAULT;
if(len < 0)
return -EINVAL;

lock_sock(sk);

switch(optname) {
case IP_OPTIONS:
{
unsigned char optbuf[sizeof(struct ip_options)+40];
struct ip_options * opt = (struct ip_options*)optbuf;
opt->optlen = 0;
if (sk->protinfo.af_inet.opt)
memcpy(optbuf, sk->protinfo.af_inet.opt,
sizeof(struct ip_options)+
sk->protinfo.af_inet.opt->optlen);
release_sock(sk);

if (opt->optlen == 0)
return put_user(0, optlen);

ip_options_undo(opt);

len = min_t(unsigned int, len, opt->optlen);
if(put_user(len, optlen))
return -EFAULT;
if(copy_to_user(optval, opt->__data, len))
return -EFAULT;
return 0;
}
case IP_PKTINFO:
val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_PKTINFO) != 0;
break;
case IP_RECVTTL:
val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_TTL) != 0;
break;
case IP_RECVTOS:
val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_TOS) != 0;
break;
case IP_RECVOPTS:
val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_RECVOPTS) != 0;
break;
case IP_RETOPTS:
val = (sk->protinfo.af_inet.cmsg_flags & IP_CMSG_RETOPTS) != 0;
break;
case IP_TOS:
val=sk->protinfo.af_inet.tos;
break;
case IP_TTL:
val=sk->protinfo.af_inet.ttl;
break;
case IP_HDRINCL:
val=sk->protinfo.af_inet.hdrincl;
break;
case IP_MTU_DISCOVER:
val=sk->protinfo.af_inet.pmtudisc;
break;
case IP_MTU:
{
struct dst_entry *dst;
val = 0;
dst = sk_dst_get(sk);
if (dst) {
val = dst->pmtu;
dst_release(dst);
}
if (!val) {
release_sock(sk);
return -ENOTCONN;
}
break;
}
case IP_RECVERR:
val=sk->protinfo.af_inet.recverr;
break;
case IP_MULTICAST_TTL:
val=sk->protinfo.af_inet.mc_ttl;
break;
case IP_MULTICAST_LOOP:
val=sk->protinfo.af_inet.mc_loop;
break;
case IP_MULTICAST_IF:
{
struct in_addr addr;
len = min_t(unsigned int, len, sizeof(struct in_addr));
addr.s_addr = sk->protinfo.af_inet.mc_addr;
release_sock(sk);

if(put_user(len, optlen))
return -EFAULT;
if(copy_to_user((void *)optval, &addr, len))
return -EFAULT;
return 0;
}
case IP_PKTOPTIONS:
{
struct msghdr msg;

release_sock(sk);

if (sk->type != SOCK_STREAM)
return -ENOPROTOOPT;

msg.msg_control = optval;
msg.msg_controllen = len;
msg.msg_flags = 0;

if (sk->protinfo.af_inet.cmsg_flags&IP_CMSG_PKTINFO) {
struct in_pktinfo info;

info.ipi_addr.s_addr = sk->rcv_saddr;
info.ipi_spec_dst.s_addr = sk->rcv_saddr;
info.ipi_ifindex = sk->protinfo.af_inet.mc_index;
put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
}
if (sk->protinfo.af_inet.cmsg_flags&IP_CMSG_TTL) {
int hlim = sk->protinfo.af_inet.mc_ttl;
put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
}
len -= msg.msg_controllen;
return put_user(len, optlen);
}
case IP_FREEBIND:
val = sk->protinfo.af_inet.freebind;
break;
default:
#ifdef CONFIG_NETFILTER
val = nf_getsockopt(sk, PF_INET, optname, optval,
&len);
release_sock(sk);
if (val >= 0)
val = put_user(len, optlen);
return val;
#else
release_sock(sk);
return -ENOPROTOOPT;
#endif
}
release_sock(sk);

if (len < sizeof(int) && len > 0 && val>=0 && val<255) {
unsigned char ucval = (unsigned char)val;
len = 1;
if(put_user(len, optlen))
return -EFAULT;
if(copy_to_user(optval,&ucval,1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
if(put_user(len, optlen))
return -EFAULT;
if(copy_to_user(optval,&val,len))
return -EFAULT;
}
return 0;
}