Socket System Call

 

How to trace the system call ?

alokg@esus:~/www/518$ gdb a.out
gdb: Symbol `emacs_ctlx_keymap' has different size in shared object, consider re-linking
GNU gdb 2002-04-01-cvs
Copyright 2002 Free Software Foundation, Inc.
GDB is free software, covered by the GNU General Public License, and you are
welcome to change it and/or distribute copies of it under certain conditions.
Type "show copying" to see the conditions.
There is absolutely no warranty for GDB. Type "show warranty" for details.
This GDB was configured as "i386-linux"...
(gdb) disassemble socket
Dump of assembler code for function socket:
0x804e780 <socket>: mov %ebx,%edx
0x804e782 <socket+2>: mov $0x66,%eax
0x804e787 <socket+7>: mov $0x1,%ebx
0x804e78c <socket+12>: lea 0x4(%esp,1),%ecx
0x804e790 <socket+16>: int $0x80
0x804e792 <socket+18>: mov %edx,%ebx
0x804e794 <socket+20>: cmp $0xffffff83,%eax
0x804e797 <socket+23>: jae 0x8050330 <__syscall_error>
0x804e79d <socket+29>: ret
0x804e79e <socket+30>: nop
0x804e79f <socket+31>: nop
End of assembler dump.

.long SYMBOL_NAME(sys_statfs)
.long SYMBOL_NAME(sys_fstatfs) /* 100 */
.long SYMBOL_NAME(sys_ioperm)
.long SYMBOL_NAME(sys_socketcall)   /* <-- This is entry 102 */
.long SYMBOL_NAME(sys_syslog)
.long SYMBOL_NAME(sys_setitimer)
.long SYMBOL_NAME(sys_getitimer) /* 105 */

 

Entrypoint for system call :  arch/i386/kernel/entry.S

ENTRY(system_call)
        pushl %eax                      # save orig_eax
        SAVE_ALL
        GET_CURRENT(%ebx)
        cmpl $(NR_syscalls),%eax
        jae badsys
        testb $0x02,tsk_ptrace(%ebx)    # PT_TRACESYS
        jne tracesys
        call *SYMBOL_NAME(sys_call_table)(,%eax,4)
        movl %eax,EAX(%esp)             # save the return value

.....
.....
.data
ENTRY(sys_call_table)
.....
        .long SYMBOL_NAME(sys_fstatfs)          /* 100 */
        .long SYMBOL_NAME(sys_ioperm)
        .long SYMBOL_NAME(sys_socketcall)       /* 102 */
.....
 

include/linux/net.h

#define SYS_SOCKET      1               /* sys_socket(2)             */
#define SYS_BIND        2               /* sys_bind(2)               */
#define SYS_CONNECT     3               /* sys_connect(2)            */
#define SYS_LISTEN      4               /* sys_listen(2)             */
#define SYS_ACCEPT      5               /* sys_accept(2)             */
#define SYS_GETSOCKNAME 6               /* sys_getsockname(2)        */
#define SYS_GETPEERNAME 7               /* sys_getpeername(2)        */
#define SYS_SOCKETPAIR  8               /* sys_socketpair(2)         */
#define SYS_SEND        9               /* sys_send(2)               */
#define SYS_RECV        10              /* sys_recv(2)               */
#define SYS_SENDTO      11              /* sys_sendto(2)             */
#define SYS_RECVFROM    12              /* sys_recvfrom(2)           */
#define SYS_SHUTDOWN    13              /* sys_shutdown(2)           */
#define SYS_SETSOCKOPT  14              /* sys_setsockopt(2)         */
#define SYS_GETSOCKOPT  15              /* sys_getsockopt(2)         */
#define SYS_SENDMSG     16              /* sys_sendmsg(2)            */
#define SYS_RECVMSG     17              /* sys_recvmsg(2)            */

net/socket.c/sys_socketcall

/*
 *      System call vectors.
 *
 *      Argument checking cleaned up. Saved 20% in size.
 *  This function doesn't need to set the kernel lock because
 *  it is set by the callees.
 */

asmlinkage long sys_socketcall(int call, unsigned long *args)
{
        unsigned long a[6];
        unsigned long a0,a1;
        int err;

        if(call<1||call>SYS_RECVMSG)
                return -EINVAL;

        /* copy_from_user should be SMP safe. */
        if (copy_from_user(a, args, nargs[call]))
                return -EFAULT;
               
        a0=a[0];
        a1=a[1];
       
        switch(call)
        {
                case SYS_SOCKET:
                        err = sys_socket(a0,a1,a[2]);
                        break;
                case SYS_BIND:
                        err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
                        break;
                case SYS_CONNECT:
                        err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
                        break;
                case SYS_LISTEN:
                        err = sys_listen(a0,a1);
                        break;
                case SYS_ACCEPT:
                        err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
                        break;
                case SYS_GETSOCKNAME:
                        err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
                        break;
                case SYS_GETPEERNAME:
                        err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
                        break;
                case SYS_SOCKETPAIR:
                        err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
                        break;
                case SYS_SEND:
                        err = sys_send(a0, (void *)a1, a[2], a[3]);
                        break;
                case SYS_SENDTO:
                        err = sys_sendto(a0,(void *)a1, a[2], a[3],
                                         (struct sockaddr *)a[4], a[5]);
                        break;
                case SYS_RECV:
                        err = sys_recv(a0, (void *)a1, a[2], a[3]);
                        break;
                case SYS_RECVFROM:
                        err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
                                           (struct sockaddr *)a[4], (int *)a[5]);
                        break;
                case SYS_SHUTDOWN:
                        err = sys_shutdown(a0,a1);
                        break;
                case SYS_SETSOCKOPT:
                        err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
                        break;
                case SYS_GETSOCKOPT:
                        err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
                        break;
                case SYS_SENDMSG:
                        err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
                        break;
                case SYS_RECVMSG:
                        err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
                        break;
                default:
                        err = -EINVAL;
                        break;
        }
        return err;
}


net/socket.c/sys_socket()

asmlinkage long sys_socket(int family, int type, int protocol)
{
        int retval;
        struct socket *sock;

        retval = sock_create(family, type, protocol, &sock);
        if (retval < 0)
                goto out;

        retval = sock_map_fd(sock);
        if (retval < 0)
                goto out_release;

out:
        /* It may be already another descriptor 8) Not kernel problem. */
        return retval;

out_release:
        sock_release(sock);
        return retval;
}

int sock_create(int family, int type, int protocol, struct socket **res)
{
        int i;
        struct socket *sock;

        /*
         *      Check protocol is in range
         */
        if(family<0 || family>=NPROTO)
                return -EINVAL;
.....
        net_family_read_lock();
        if (net_families[family] == NULL) {
                i = -EINVAL;
                goto out;
        }

/*
 *      Allocate the socket and allow the family to set things up. if
 *      the protocol is 0, the family is instructed to select an appropriate
 *      default.
 */

        if (!(sock = sock_alloc()))
        {
                printk(KERN_WARNING "socket: no more sockets\n");
                i = -ENFILE;            /* Not exactly a match, but its the
                                           closest posix thing */
                goto out;
        }

        sock->type  = type;

        if ((i = net_families[family]->create(sock, protocol)) < 0)
        {
                sock_release(sock);
                goto out;
        }

        *res = sock;

out:
        net_family_read_unlock();
        return i;
}

/**
 *      sock_alloc      -       allocate a socket
 *     
 *      Allocate a new inode and socket object. The two are bound together
 *      and initialised. The socket is then returned. If we are out of inodes
 *      NULL is returned.
 */

struct socket *sock_alloc(void)
{
        struct inode * inode;
        struct socket * sock;

        inode = get_empty_inode();
        if (!inode)
                return NULL;

        sock = socki_lookup(inode);

        inode->i_mode = S_IFSOCK|S_IRWXUGO;
        inode->i_sock = 1;
        inode->i_uid = current->fsuid;
        inode->i_gid = current->fsgid;

        sock->inode = inode;
        init_waitqueue_head(&sock->wait);
        sock->fasync_list = NULL;
        sock->state = SS_UNCONNECTED;
        sock->flags = 0;
        sock->ops = NULL;
        sock->sk = NULL;
        sock->file = NULL;

        sockets_in_use[smp_processor_id()].counter++;
        return sock;
}

extern __inline__ struct socket *socki_lookup(struct inode *inode)
{
        return &inode->u.socket_i;
}

/*
 *      Obtains the first available file descriptor and sets it up for use.
 *
 *      This functions creates file structure and maps it to fd space
 *      of current process. On success it returns file descriptor
 *      and file struct implicitly stored in sock->file.
 *      Note that another thread may close file descriptor before we return
 *      from this function. We use the fact that now we do not refer
 *      to socket after mapping. If one day we will need it, this
 *      function will inincrement ref. count on file by 1.
 *
 *      In any case returned fd MAY BE not valid!
 *      This race condition is inavoidable
 *      with shared fd spaces, we cannot solve is inside kernel,
 *      but we take care of internal coherence yet.
 */

static int sock_map_fd(struct socket *sock)
{
        int fd;

        /*
         *      Find a file descriptor suitable for return to the user.
         */

        fd = get_unused_fd();
        if (fd >= 0) {
                struct file *file = get_empty_filp();

                if (!file) {
                        put_unused_fd(fd);
                        fd = -ENFILE;
                        goto out;
                }

                file->f_dentry = d_alloc_root(sock->inode);
                /* MOUNT_REWRITE: set to sockfs internal vfsmnt */
                file->f_vfsmnt = NULL;
                if (!file->f_dentry) {
                        put_filp(file);
                        put_unused_fd(fd);
                        fd = -ENOMEM;
                        goto out;
                }

                sock->file = file;
                file->f_op = &socket_file_ops;
                file->f_mode = 3;
                file->f_flags = O_RDWR;
                file->f_pos = 0;
                fd_install(fd, file);
        }

out:
        return fd;
}

/*
 *      Socket files have a set of 'special' operations as well as the generic file ones. Th
ese don't appear
 *      in the operation structures but are done directly via the socketcall() multiplexor.
 */

static struct file_operations socket_file_ops = {
        llseek:         sock_lseek,
        read:           sock_read,
        write:          sock_write,
        poll:           sock_poll,
        ioctl:          sock_ioctl,
        mmap:           sock_mmap,
        open:           sock_no_open,   /* special open code to disallow open via /proc */
        release:        sock_close,
        fasync:         sock_fasync,
        readv:          sock_readv,
        writev:         sock_writev
};
 

 

List of Linux Kernel Networking Functions

destroy_sock - net/ipv4/af_inet.c
  deletes any timers
  calls any protocols specific destroy functions
  frees the socket's queues
  frees the socket structure itself

fib_lookup() - include/net/ip_fib.h
  calls tb_lookup() [= fn_hash_lookup()] on local and main tables
  returns route or unreachable error

fn_hash_lookup() - net/ipv4/fib_hash.c
  looks up and returns route to an address

inet_create() - net/ipv4/af_inet.c
  calls sk_alloc() to get memory for sock
  initializes sock structure:
    sets proto structure to appropriate values for TCP or UDP
    calls sock_init_data()
    sets family,protocol,etc. variables
  calls the protocol init function (if any)

inet_release() - net/ipv4/af_inet.c
  changes socket state to disconnecting
  calls ip_mc_drop_socket to leave multicast group (if necessary)
  sets owning socket's data member to NULL
  calls sk->prot->close() [=TCP/UDP_close()]

ip_route_connect() - include/net/route.h
  calls ip_route_output() to get a destination address
  returns if the call works or generates an error
  otherwise clears the route pointer and try again

ip_route_output() - net/ipv4/route.c
  calculates hash value for address
  runs through table (starting at hash) to match addresses and TOS
  if there is a match, updates stats and return route entry
  else calls ip_route_output_slow()

ip_route_output_slow() - net/ipv4/route.c
  if source address is known, looks up output device
  if destination address is unknown, sets up loopback
  calls fib_lookup() to find route in FIB
  allocates memory new routing table entry
  initializes table entry with source, destination, TOS, output device,
      flags
  calls rt_set_nexthop() to find next destination
  returns rt_intern_hash(), which installs route in routing table

rt_intern_hash() - net/ipv4/route.c
  loops through rt_hash_table (starting at hash value)
  if keys match, put rtable entry in front bucket
  else put rtable entry into hash table at hash

sock_close() - net/socket.c
  checks if socket exists (could be null)
  calls sock_fasync() to remove socket from async list
  calls sock_release()

sock_create() - net/socket.c
  checks parameters
  calls sock_alloc() to get an available inode for the socket and
      initialize it
  sets socket->type (to SOCK_STREAM, SOCK_DGRAM...)
  calls net_family->create() [= inet_create()] to build sock structure
  returns established socket

sock_init_data() - net/core/sock.c
  initializes all generic sock values
sock_read() - net/socket.c sets up message headers returns sock_recvmsg() with result of read sock_recvmsg() - net/socket.c reads socket management packet (scm) or packet by calling sock->ops[inet]->recvmsg() sock_release() - net/socket.c changes state to disconnecting calls sock->ops->release() [= inet_release()] calls iput() to remove socket from inode list sys_socket() - net/socket.c calls sock_create() to get and initialize socket calls get_fd() to assign an fd to the socket sets socket->file to fcheck() (pointer to file) calls sock_release() if anything fails tcp_close() - net/ipv4/tcp.c check for errors pops and discards all packets off incoming queue sends messages to destination to close connection (if required) tcp_connect() - net/ipv4/tcp_output.c completes connection packet with appropriate bits and window sizes set puts packet on socket output queue calls tcp_transmit_skb() to send packet, initiating TCP connection tcp_v4_connect() - net/ipv4/tcp_ipv4.c checks for errors calls ip_route_connect() to find route to destination creates connection packet calls tcp_connect() to send packet udp_close() - net/ipv4/udp.c calls udp_v4_unhash() to remove socket from socket list calls destroy_sock() udp_connect() - net/ipv4/udp.c calls ip_route_connect() to find route to destination updates socket with source and destination addresses and ports changes socket state to established saves the destination route in sock->dst_cache