Magellan Linux

Annotation of /alx-src/tags/kernel26-2.6.12-alx-r9/net/socket.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 630 - (hide annotations) (download)
Wed Mar 4 11:03:09 2009 UTC (15 years, 3 months ago) by niro
File MIME type: text/plain
File size: 50026 byte(s)
Tag kernel26-2.6.12-alx-r9
1 niro 628 /*
2     * NET An implementation of the SOCKET network access protocol.
3     *
4     * Version: @(#)socket.c 1.1.93 18/02/95
5     *
6     * Authors: Orest Zborowski, <obz@Kodak.COM>
7     * Ross Biro
8     * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
9     *
10     * Fixes:
11     * Anonymous : NOTSOCK/BADF cleanup. Error fix in
12     * shutdown()
13     * Alan Cox : verify_area() fixes
14     * Alan Cox : Removed DDI
15     * Jonathan Kamens : SOCK_DGRAM reconnect bug
16     * Alan Cox : Moved a load of checks to the very
17     * top level.
18     * Alan Cox : Move address structures to/from user
19     * mode above the protocol layers.
20     * Rob Janssen : Allow 0 length sends.
21     * Alan Cox : Asynchronous I/O support (cribbed from the
22     * tty drivers).
23     * Niibe Yutaka : Asynchronous I/O for writes (4.4BSD style)
24     * Jeff Uphoff : Made max number of sockets command-line
25     * configurable.
26     * Matti Aarnio : Made the number of sockets dynamic,
27     * to be allocated when needed, and mr.
28     * Uphoff's max is used as max to be
29     * allowed to allocate.
30     * Linus : Argh. removed all the socket allocation
31     * altogether: it's in the inode now.
32     * Alan Cox : Made sock_alloc()/sock_release() public
33     * for NetROM and future kernel nfsd type
34     * stuff.
35     * Alan Cox : sendmsg/recvmsg basics.
36     * Tom Dyas : Export net symbols.
37     * Marcin Dalecki : Fixed problems with CONFIG_NET="n".
38     * Alan Cox : Added thread locking to sys_* calls
39     * for sockets. May have errors at the
40     * moment.
41     * Kevin Buhr : Fixed the dumb errors in the above.
42     * Andi Kleen : Some small cleanups, optimizations,
43     * and fixed a copy_from_user() bug.
44     * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0)
45     * Tigran Aivazian : Made listen(2) backlog sanity checks
46     * protocol-independent
47     *
48     *
49     * This program is free software; you can redistribute it and/or
50     * modify it under the terms of the GNU General Public License
51     * as published by the Free Software Foundation; either version
52     * 2 of the License, or (at your option) any later version.
53     *
54     *
55     * This module is effectively the top level interface to the BSD socket
56     * paradigm.
57     *
58     * Based upon Swansea University Computer Society NET3.039
59     */
60    
61     #include <linux/config.h>
62     #include <linux/mm.h>
63     #include <linux/smp_lock.h>
64     #include <linux/socket.h>
65     #include <linux/file.h>
66     #include <linux/net.h>
67     #include <linux/interrupt.h>
68     #include <linux/netdevice.h>
69     #include <linux/proc_fs.h>
70     #include <linux/seq_file.h>
71     #include <linux/wanrouter.h>
72     #include <linux/if_bridge.h>
73     #include <linux/init.h>
74     #include <linux/poll.h>
75     #include <linux/cache.h>
76     #include <linux/module.h>
77     #include <linux/highmem.h>
78     #include <linux/divert.h>
79     #include <linux/mount.h>
80     #include <linux/security.h>
81     #include <linux/syscalls.h>
82     #include <linux/compat.h>
83     #include <linux/kmod.h>
84    
85     #ifdef CONFIG_NET_RADIO
86     #include <linux/wireless.h> /* Note : will define WIRELESS_EXT */
87     #endif /* CONFIG_NET_RADIO */
88    
89     #include <asm/uaccess.h>
90     #include <asm/unistd.h>
91    
92     #include <net/compat.h>
93    
94     #include <net/sock.h>
95     #include <linux/netfilter.h>
96    
97     static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
98     static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf,
99     size_t size, loff_t pos);
100     static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf,
101     size_t size, loff_t pos);
102     static int sock_mmap(struct file *file, struct vm_area_struct * vma);
103    
104     static int sock_close(struct inode *inode, struct file *file);
105     static unsigned int sock_poll(struct file *file,
106     struct poll_table_struct *wait);
107     static long sock_ioctl(struct file *file,
108     unsigned int cmd, unsigned long arg);
109     static int sock_fasync(int fd, struct file *filp, int on);
110     static ssize_t sock_readv(struct file *file, const struct iovec *vector,
111     unsigned long count, loff_t *ppos);
112     static ssize_t sock_writev(struct file *file, const struct iovec *vector,
113     unsigned long count, loff_t *ppos);
114     static ssize_t sock_sendpage(struct file *file, struct page *page,
115     int offset, size_t size, loff_t *ppos, int more);
116    
117    
118     /*
119     * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
120     * in the operation structures but are done directly via the socketcall() multiplexor.
121     */
122    
123     static struct file_operations socket_file_ops = {
124     .owner = THIS_MODULE,
125     .llseek = no_llseek,
126     .aio_read = sock_aio_read,
127     .aio_write = sock_aio_write,
128     .poll = sock_poll,
129     .unlocked_ioctl = sock_ioctl,
130     .mmap = sock_mmap,
131     .open = sock_no_open, /* special open code to disallow open via /proc */
132     .release = sock_close,
133     .fasync = sock_fasync,
134     .readv = sock_readv,
135     .writev = sock_writev,
136     .sendpage = sock_sendpage
137     };
138    
139     /*
140     * The protocol list. Each protocol is registered in here.
141     */
142    
143     static struct net_proto_family *net_families[NPROTO];
144    
145     #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
146     static atomic_t net_family_lockct = ATOMIC_INIT(0);
147     static DEFINE_SPINLOCK(net_family_lock);
148    
149     /* The strategy is: modifications net_family vector are short, do not
150     sleep and veeery rare, but read access should be free of any exclusive
151     locks.
152     */
153    
154     static void net_family_write_lock(void)
155     {
156     spin_lock(&net_family_lock);
157     while (atomic_read(&net_family_lockct) != 0) {
158     spin_unlock(&net_family_lock);
159    
160     yield();
161    
162     spin_lock(&net_family_lock);
163     }
164     }
165    
166     static __inline__ void net_family_write_unlock(void)
167     {
168     spin_unlock(&net_family_lock);
169     }
170    
171     static __inline__ void net_family_read_lock(void)
172     {
173     atomic_inc(&net_family_lockct);
174     spin_unlock_wait(&net_family_lock);
175     }
176    
177     static __inline__ void net_family_read_unlock(void)
178     {
179     atomic_dec(&net_family_lockct);
180     }
181    
182     #else
183     #define net_family_write_lock() do { } while(0)
184     #define net_family_write_unlock() do { } while(0)
185     #define net_family_read_lock() do { } while(0)
186     #define net_family_read_unlock() do { } while(0)
187     #endif
188    
189    
190     /*
191     * Statistics counters of the socket lists
192     */
193    
194     static DEFINE_PER_CPU(int, sockets_in_use) = 0;
195    
196     /*
197     * Support routines. Move socket addresses back and forth across the kernel/user
198     * divide and look after the messy bits.
199     */
200    
201     #define MAX_SOCK_ADDR 128 /* 108 for Unix domain -
202     16 for IP, 16 for IPX,
203     24 for IPv6,
204     about 80 for AX.25
205     must be at least one bigger than
206     the AF_UNIX size (see net/unix/af_unix.c
207     :unix_mkname()).
208     */
209    
210     /**
211     * move_addr_to_kernel - copy a socket address into kernel space
212     * @uaddr: Address in user space
213     * @kaddr: Address in kernel space
214     * @ulen: Length in user space
215     *
216     * The address is copied into kernel space. If the provided address is
217     * too long an error code of -EINVAL is returned. If the copy gives
218     * invalid addresses -EFAULT is returned. On a success 0 is returned.
219     */
220    
221     int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr)
222     {
223     if(ulen<0||ulen>MAX_SOCK_ADDR)
224     return -EINVAL;
225     if(ulen==0)
226     return 0;
227     if(copy_from_user(kaddr,uaddr,ulen))
228     return -EFAULT;
229     return 0;
230     }
231    
232     /**
233     * move_addr_to_user - copy an address to user space
234     * @kaddr: kernel space address
235     * @klen: length of address in kernel
236     * @uaddr: user space address
237     * @ulen: pointer to user length field
238     *
239     * The value pointed to by ulen on entry is the buffer length available.
240     * This is overwritten with the buffer space used. -EINVAL is returned
241     * if an overlong buffer is specified or a negative buffer size. -EFAULT
242     * is returned if either the buffer or the length field are not
243     * accessible.
244     * After copying the data up to the limit the user specifies, the true
245     * length of the data is written over the length limit the user
246     * specified. Zero is returned for a success.
247     */
248    
249     int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen)
250     {
251     int err;
252     int len;
253    
254     if((err=get_user(len, ulen)))
255     return err;
256     if(len>klen)
257     len=klen;
258     if(len<0 || len> MAX_SOCK_ADDR)
259     return -EINVAL;
260     if(len)
261     {
262     if(copy_to_user(uaddr,kaddr,len))
263     return -EFAULT;
264     }
265     /*
266     * "fromlen shall refer to the value before truncation.."
267     * 1003.1g
268     */
269     return __put_user(klen, ulen);
270     }
271    
272     #define SOCKFS_MAGIC 0x534F434B
273    
274     static kmem_cache_t * sock_inode_cachep;
275    
276     static struct inode *sock_alloc_inode(struct super_block *sb)
277     {
278     struct socket_alloc *ei;
279     ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL);
280     if (!ei)
281     return NULL;
282     init_waitqueue_head(&ei->socket.wait);
283    
284     ei->socket.fasync_list = NULL;
285     ei->socket.state = SS_UNCONNECTED;
286     ei->socket.flags = 0;
287     ei->socket.ops = NULL;
288     ei->socket.sk = NULL;
289     ei->socket.file = NULL;
290     ei->socket.flags = 0;
291    
292     return &ei->vfs_inode;
293     }
294    
295     static void sock_destroy_inode(struct inode *inode)
296     {
297     kmem_cache_free(sock_inode_cachep,
298     container_of(inode, struct socket_alloc, vfs_inode));
299     }
300    
301     static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
302     {
303     struct socket_alloc *ei = (struct socket_alloc *) foo;
304    
305     if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
306     SLAB_CTOR_CONSTRUCTOR)
307     inode_init_once(&ei->vfs_inode);
308     }
309    
310     static int init_inodecache(void)
311     {
312     sock_inode_cachep = kmem_cache_create("sock_inode_cache",
313     sizeof(struct socket_alloc),
314     0, SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT,
315     init_once, NULL);
316     if (sock_inode_cachep == NULL)
317     return -ENOMEM;
318     return 0;
319     }
320    
321     static struct super_operations sockfs_ops = {
322     .alloc_inode = sock_alloc_inode,
323     .destroy_inode =sock_destroy_inode,
324     .statfs = simple_statfs,
325     };
326    
327     static struct super_block *sockfs_get_sb(struct file_system_type *fs_type,
328     int flags, const char *dev_name, void *data)
329     {
330     return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC);
331     }
332    
333     static struct vfsmount *sock_mnt;
334    
335     static struct file_system_type sock_fs_type = {
336     .name = "sockfs",
337     .get_sb = sockfs_get_sb,
338     .kill_sb = kill_anon_super,
339     };
340     static int sockfs_delete_dentry(struct dentry *dentry)
341     {
342     return 1;
343     }
344     static struct dentry_operations sockfs_dentry_operations = {
345     .d_delete = sockfs_delete_dentry,
346     };
347    
348     /*
349     * Obtains the first available file descriptor and sets it up for use.
350     *
351     * This function creates file structure and maps it to fd space
352     * of current process. On success it returns file descriptor
353     * and file struct implicitly stored in sock->file.
354     * Note that another thread may close file descriptor before we return
355     * from this function. We use the fact that now we do not refer
356     * to socket after mapping. If one day we will need it, this
357     * function will increment ref. count on file by 1.
358     *
359     * In any case returned fd MAY BE not valid!
360     * This race condition is unavoidable
361     * with shared fd spaces, we cannot solve it inside kernel,
362     * but we take care of internal coherence yet.
363     */
364    
365     int sock_map_fd(struct socket *sock)
366     {
367     int fd;
368     struct qstr this;
369     char name[32];
370    
371     /*
372     * Find a file descriptor suitable for return to the user.
373     */
374    
375     fd = get_unused_fd();
376     if (fd >= 0) {
377     struct file *file = get_empty_filp();
378    
379     if (!file) {
380     put_unused_fd(fd);
381     fd = -ENFILE;
382     goto out;
383     }
384    
385     sprintf(name, "[%lu]", SOCK_INODE(sock)->i_ino);
386     this.name = name;
387     this.len = strlen(name);
388     this.hash = SOCK_INODE(sock)->i_ino;
389    
390     file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
391     if (!file->f_dentry) {
392     put_filp(file);
393     put_unused_fd(fd);
394     fd = -ENOMEM;
395     goto out;
396     }
397     file->f_dentry->d_op = &sockfs_dentry_operations;
398     d_add(file->f_dentry, SOCK_INODE(sock));
399     file->f_vfsmnt = mntget(sock_mnt);
400     file->f_mapping = file->f_dentry->d_inode->i_mapping;
401    
402     sock->file = file;
403     file->f_op = SOCK_INODE(sock)->i_fop = &socket_file_ops;
404     file->f_mode = FMODE_READ | FMODE_WRITE;
405     file->f_flags = O_RDWR;
406     file->f_pos = 0;
407     fd_install(fd, file);
408     }
409    
410     out:
411     return fd;
412     }
413    
414     /**
415     * sockfd_lookup - Go from a file number to its socket slot
416     * @fd: file handle
417     * @err: pointer to an error code return
418     *
419     * The file handle passed in is locked and the socket it is bound
420     * too is returned. If an error occurs the err pointer is overwritten
421     * with a negative errno code and NULL is returned. The function checks
422     * for both invalid handles and passing a handle which is not a socket.
423     *
424     * On a success the socket object pointer is returned.
425     */
426    
427     struct socket *sockfd_lookup(int fd, int *err)
428     {
429     struct file *file;
430     struct inode *inode;
431     struct socket *sock;
432    
433     if (!(file = fget(fd)))
434     {
435     *err = -EBADF;
436     return NULL;
437     }
438    
439     inode = file->f_dentry->d_inode;
440     if (!S_ISSOCK(inode->i_mode)) {
441     *err = -ENOTSOCK;
442     fput(file);
443     return NULL;
444     }
445    
446     sock = SOCKET_I(inode);
447     if (sock->file != file) {
448     printk(KERN_ERR "socki_lookup: socket file changed!\n");
449     sock->file = file;
450     }
451     return sock;
452     }
453    
454     /**
455     * sock_alloc - allocate a socket
456     *
457     * Allocate a new inode and socket object. The two are bound together
458     * and initialised. The socket is then returned. If we are out of inodes
459     * NULL is returned.
460     */
461    
462     static struct socket *sock_alloc(void)
463     {
464     struct inode * inode;
465     struct socket * sock;
466    
467     inode = new_inode(sock_mnt->mnt_sb);
468     if (!inode)
469     return NULL;
470    
471     sock = SOCKET_I(inode);
472    
473     inode->i_mode = S_IFSOCK|S_IRWXUGO;
474     inode->i_uid = current->fsuid;
475     inode->i_gid = current->fsgid;
476    
477     get_cpu_var(sockets_in_use)++;
478     put_cpu_var(sockets_in_use);
479     return sock;
480     }
481    
482     /*
483     * In theory you can't get an open on this inode, but /proc provides
484     * a back door. Remember to keep it shut otherwise you'll let the
485     * creepy crawlies in.
486     */
487    
488     static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
489     {
490     return -ENXIO;
491     }
492    
493     struct file_operations bad_sock_fops = {
494     .owner = THIS_MODULE,
495     .open = sock_no_open,
496     };
497    
498     /**
499     * sock_release - close a socket
500     * @sock: socket to close
501     *
502     * The socket is released from the protocol stack if it has a release
503     * callback, and the inode is then released if the socket is bound to
504     * an inode not a file.
505     */
506    
507     void sock_release(struct socket *sock)
508     {
509     if (sock->ops) {
510     struct module *owner = sock->ops->owner;
511    
512     sock->ops->release(sock);
513     sock->ops = NULL;
514     module_put(owner);
515     }
516    
517     if (sock->fasync_list)
518     printk(KERN_ERR "sock_release: fasync list not empty!\n");
519    
520     get_cpu_var(sockets_in_use)--;
521     put_cpu_var(sockets_in_use);
522     if (!sock->file) {
523     iput(SOCK_INODE(sock));
524     return;
525     }
526     sock->file=NULL;
527     }
528    
529     static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
530     struct msghdr *msg, size_t size)
531     {
532     struct sock_iocb *si = kiocb_to_siocb(iocb);
533     int err;
534    
535     si->sock = sock;
536     si->scm = NULL;
537     si->msg = msg;
538     si->size = size;
539    
540     err = security_socket_sendmsg(sock, msg, size);
541     if (err)
542     return err;
543    
544     return sock->ops->sendmsg(iocb, sock, msg, size);
545     }
546    
547     int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
548     {
549     struct kiocb iocb;
550     struct sock_iocb siocb;
551     int ret;
552    
553     init_sync_kiocb(&iocb, NULL);
554     iocb.private = &siocb;
555     ret = __sock_sendmsg(&iocb, sock, msg, size);
556     if (-EIOCBQUEUED == ret)
557     ret = wait_on_sync_kiocb(&iocb);
558     return ret;
559     }
560    
561     int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
562     struct kvec *vec, size_t num, size_t size)
563     {
564     mm_segment_t oldfs = get_fs();
565     int result;
566    
567     set_fs(KERNEL_DS);
568     /*
569     * the following is safe, since for compiler definitions of kvec and
570     * iovec are identical, yielding the same in-core layout and alignment
571     */
572     msg->msg_iov = (struct iovec *)vec,
573     msg->msg_iovlen = num;
574     result = sock_sendmsg(sock, msg, size);
575     set_fs(oldfs);
576     return result;
577     }
578    
579     static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
580     struct msghdr *msg, size_t size, int flags)
581     {
582     int err;
583     struct sock_iocb *si = kiocb_to_siocb(iocb);
584    
585     si->sock = sock;
586     si->scm = NULL;
587     si->msg = msg;
588     si->size = size;
589     si->flags = flags;
590    
591     err = security_socket_recvmsg(sock, msg, size, flags);
592     if (err)
593     return err;
594    
595     return sock->ops->recvmsg(iocb, sock, msg, size, flags);
596     }
597    
598     int sock_recvmsg(struct socket *sock, struct msghdr *msg,
599     size_t size, int flags)
600     {
601     struct kiocb iocb;
602     struct sock_iocb siocb;
603     int ret;
604    
605     init_sync_kiocb(&iocb, NULL);
606     iocb.private = &siocb;
607     ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
608     if (-EIOCBQUEUED == ret)
609     ret = wait_on_sync_kiocb(&iocb);
610     return ret;
611     }
612    
613     int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
614     struct kvec *vec, size_t num,
615     size_t size, int flags)
616     {
617     mm_segment_t oldfs = get_fs();
618     int result;
619    
620     set_fs(KERNEL_DS);
621     /*
622     * the following is safe, since for compiler definitions of kvec and
623     * iovec are identical, yielding the same in-core layout and alignment
624     */
625     msg->msg_iov = (struct iovec *)vec,
626     msg->msg_iovlen = num;
627     result = sock_recvmsg(sock, msg, size, flags);
628     set_fs(oldfs);
629     return result;
630     }
631    
632     static void sock_aio_dtor(struct kiocb *iocb)
633     {
634     kfree(iocb->private);
635     }
636    
637     /*
638     * Read data from a socket. ubuf is a user mode pointer. We make sure the user
639     * area ubuf...ubuf+size-1 is writable before asking the protocol.
640     */
641    
642     static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf,
643     size_t size, loff_t pos)
644     {
645     struct sock_iocb *x, siocb;
646     struct socket *sock;
647     int flags;
648    
649     if (pos != 0)
650     return -ESPIPE;
651     if (size==0) /* Match SYS5 behaviour */
652     return 0;
653    
654     if (is_sync_kiocb(iocb))
655     x = &siocb;
656     else {
657     x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
658     if (!x)
659     return -ENOMEM;
660     iocb->ki_dtor = sock_aio_dtor;
661     }
662     iocb->private = x;
663     x->kiocb = iocb;
664     sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode);
665    
666     x->async_msg.msg_name = NULL;
667     x->async_msg.msg_namelen = 0;
668     x->async_msg.msg_iov = &x->async_iov;
669     x->async_msg.msg_iovlen = 1;
670     x->async_msg.msg_control = NULL;
671     x->async_msg.msg_controllen = 0;
672     x->async_iov.iov_base = ubuf;
673     x->async_iov.iov_len = size;
674     flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
675    
676     return __sock_recvmsg(iocb, sock, &x->async_msg, size, flags);
677     }
678    
679    
680     /*
681     * Write data to a socket. We verify that the user area ubuf..ubuf+size-1
682     * is readable by the user process.
683     */
684    
685     static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf,
686     size_t size, loff_t pos)
687     {
688     struct sock_iocb *x, siocb;
689     struct socket *sock;
690    
691     if (pos != 0)
692     return -ESPIPE;
693     if(size==0) /* Match SYS5 behaviour */
694     return 0;
695    
696     if (is_sync_kiocb(iocb))
697     x = &siocb;
698     else {
699     x = kmalloc(sizeof(struct sock_iocb), GFP_KERNEL);
700     if (!x)
701     return -ENOMEM;
702     iocb->ki_dtor = sock_aio_dtor;
703     }
704     iocb->private = x;
705     x->kiocb = iocb;
706     sock = SOCKET_I(iocb->ki_filp->f_dentry->d_inode);
707    
708     x->async_msg.msg_name = NULL;
709     x->async_msg.msg_namelen = 0;
710     x->async_msg.msg_iov = &x->async_iov;
711     x->async_msg.msg_iovlen = 1;
712     x->async_msg.msg_control = NULL;
713     x->async_msg.msg_controllen = 0;
714     x->async_msg.msg_flags = !(iocb->ki_filp->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
715     if (sock->type == SOCK_SEQPACKET)
716     x->async_msg.msg_flags |= MSG_EOR;
717     x->async_iov.iov_base = (void __user *)ubuf;
718     x->async_iov.iov_len = size;
719    
720     return __sock_sendmsg(iocb, sock, &x->async_msg, size);
721     }
722    
723     ssize_t sock_sendpage(struct file *file, struct page *page,
724     int offset, size_t size, loff_t *ppos, int more)
725     {
726     struct socket *sock;
727     int flags;
728    
729     sock = SOCKET_I(file->f_dentry->d_inode);
730    
731     flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
732     if (more)
733     flags |= MSG_MORE;
734    
735     return sock->ops->sendpage(sock, page, offset, size, flags);
736     }
737    
738     static int sock_readv_writev(int type, struct inode * inode,
739     struct file * file, const struct iovec * iov,
740     long count, size_t size)
741     {
742     struct msghdr msg;
743     struct socket *sock;
744    
745     sock = SOCKET_I(inode);
746    
747     msg.msg_name = NULL;
748     msg.msg_namelen = 0;
749     msg.msg_control = NULL;
750     msg.msg_controllen = 0;
751     msg.msg_iov = (struct iovec *) iov;
752     msg.msg_iovlen = count;
753     msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
754    
755     /* read() does a VERIFY_WRITE */
756     if (type == VERIFY_WRITE)
757     return sock_recvmsg(sock, &msg, size, msg.msg_flags);
758    
759     if (sock->type == SOCK_SEQPACKET)
760     msg.msg_flags |= MSG_EOR;
761    
762     return sock_sendmsg(sock, &msg, size);
763     }
764    
765     static ssize_t sock_readv(struct file *file, const struct iovec *vector,
766     unsigned long count, loff_t *ppos)
767     {
768     size_t tot_len = 0;
769     int i;
770     for (i = 0 ; i < count ; i++)
771     tot_len += vector[i].iov_len;
772     return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
773     file, vector, count, tot_len);
774     }
775    
776     static ssize_t sock_writev(struct file *file, const struct iovec *vector,
777     unsigned long count, loff_t *ppos)
778     {
779     size_t tot_len = 0;
780     int i;
781     for (i = 0 ; i < count ; i++)
782     tot_len += vector[i].iov_len;
783     return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
784     file, vector, count, tot_len);
785     }
786    
787    
788     /*
789     * Atomic setting of ioctl hooks to avoid race
790     * with module unload.
791     */
792    
793     static DECLARE_MUTEX(br_ioctl_mutex);
794     static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL;
795    
796     void brioctl_set(int (*hook)(unsigned int, void __user *))
797     {
798     down(&br_ioctl_mutex);
799     br_ioctl_hook = hook;
800     up(&br_ioctl_mutex);
801     }
802     EXPORT_SYMBOL(brioctl_set);
803    
804     static DECLARE_MUTEX(vlan_ioctl_mutex);
805     static int (*vlan_ioctl_hook)(void __user *arg);
806    
807     void vlan_ioctl_set(int (*hook)(void __user *))
808     {
809     down(&vlan_ioctl_mutex);
810     vlan_ioctl_hook = hook;
811     up(&vlan_ioctl_mutex);
812     }
813     EXPORT_SYMBOL(vlan_ioctl_set);
814    
815     static DECLARE_MUTEX(dlci_ioctl_mutex);
816     static int (*dlci_ioctl_hook)(unsigned int, void __user *);
817    
818     void dlci_ioctl_set(int (*hook)(unsigned int, void __user *))
819     {
820     down(&dlci_ioctl_mutex);
821     dlci_ioctl_hook = hook;
822     up(&dlci_ioctl_mutex);
823     }
824     EXPORT_SYMBOL(dlci_ioctl_set);
825    
826     /*
827     * With an ioctl, arg may well be a user mode pointer, but we don't know
828     * what to do with it - that's up to the protocol still.
829     */
830    
831     static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
832     {
833     struct socket *sock;
834     void __user *argp = (void __user *)arg;
835     int pid, err;
836    
837     sock = SOCKET_I(file->f_dentry->d_inode);
838     if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
839     err = dev_ioctl(cmd, argp);
840     } else
841     #ifdef WIRELESS_EXT
842     if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
843     err = dev_ioctl(cmd, argp);
844     } else
845     #endif /* WIRELESS_EXT */
846     switch (cmd) {
847     case FIOSETOWN:
848     case SIOCSPGRP:
849     err = -EFAULT;
850     if (get_user(pid, (int __user *)argp))
851     break;
852     err = f_setown(sock->file, pid, 1);
853     break;
854     case FIOGETOWN:
855     case SIOCGPGRP:
856     err = put_user(sock->file->f_owner.pid, (int __user *)argp);
857     break;
858     case SIOCGIFBR:
859     case SIOCSIFBR:
860     case SIOCBRADDBR:
861     case SIOCBRDELBR:
862     err = -ENOPKG;
863     if (!br_ioctl_hook)
864     request_module("bridge");
865    
866     down(&br_ioctl_mutex);
867     if (br_ioctl_hook)
868     err = br_ioctl_hook(cmd, argp);
869     up(&br_ioctl_mutex);
870     break;
871     case SIOCGIFVLAN:
872     case SIOCSIFVLAN:
873     err = -ENOPKG;
874     if (!vlan_ioctl_hook)
875     request_module("8021q");
876    
877     down(&vlan_ioctl_mutex);
878     if (vlan_ioctl_hook)
879     err = vlan_ioctl_hook(argp);
880     up(&vlan_ioctl_mutex);
881     break;
882     case SIOCGIFDIVERT:
883     case SIOCSIFDIVERT:
884     /* Convert this to call through a hook */
885     err = divert_ioctl(cmd, argp);
886     break;
887     case SIOCADDDLCI:
888     case SIOCDELDLCI:
889     err = -ENOPKG;
890     if (!dlci_ioctl_hook)
891     request_module("dlci");
892    
893     if (dlci_ioctl_hook) {
894     down(&dlci_ioctl_mutex);
895     err = dlci_ioctl_hook(cmd, argp);
896     up(&dlci_ioctl_mutex);
897     }
898     break;
899     default:
900     err = sock->ops->ioctl(sock, cmd, arg);
901     break;
902     }
903     return err;
904     }
905    
906     int sock_create_lite(int family, int type, int protocol, struct socket **res)
907     {
908     int err;
909     struct socket *sock = NULL;
910    
911     err = security_socket_create(family, type, protocol, 1);
912     if (err)
913     goto out;
914    
915     sock = sock_alloc();
916     if (!sock) {
917     err = -ENOMEM;
918     goto out;
919     }
920    
921     security_socket_post_create(sock, family, type, protocol, 1);
922     sock->type = type;
923     out:
924     *res = sock;
925     return err;
926     }
927    
928     /* No kernel lock held - perfect */
929     static unsigned int sock_poll(struct file *file, poll_table * wait)
930     {
931     struct socket *sock;
932    
933     /*
934     * We can't return errors to poll, so it's either yes or no.
935     */
936     sock = SOCKET_I(file->f_dentry->d_inode);
937     return sock->ops->poll(file, sock, wait);
938     }
939    
940     static int sock_mmap(struct file * file, struct vm_area_struct * vma)
941     {
942     struct socket *sock = SOCKET_I(file->f_dentry->d_inode);
943    
944     return sock->ops->mmap(file, sock, vma);
945     }
946    
947     int sock_close(struct inode *inode, struct file *filp)
948     {
949     /*
950     * It was possible the inode is NULL we were
951     * closing an unfinished socket.
952     */
953    
954     if (!inode)
955     {
956     printk(KERN_DEBUG "sock_close: NULL inode\n");
957     return 0;
958     }
959     sock_fasync(-1, filp, 0);
960     sock_release(SOCKET_I(inode));
961     return 0;
962     }
963    
964     /*
965     * Update the socket async list
966     *
967     * Fasync_list locking strategy.
968     *
969     * 1. fasync_list is modified only under process context socket lock
970     * i.e. under semaphore.
971     * 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
972     * or under socket lock.
973     * 3. fasync_list can be used from softirq context, so that
974     * modification under socket lock have to be enhanced with
975     * write_lock_bh(&sk->sk_callback_lock).
976     * --ANK (990710)
977     */
978    
979     static int sock_fasync(int fd, struct file *filp, int on)
980     {
981     struct fasync_struct *fa, *fna=NULL, **prev;
982     struct socket *sock;
983     struct sock *sk;
984    
985     if (on)
986     {
987     fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
988     if(fna==NULL)
989     return -ENOMEM;
990     }
991    
992     sock = SOCKET_I(filp->f_dentry->d_inode);
993    
994     if ((sk=sock->sk) == NULL) {
995     kfree(fna);
996     return -EINVAL;
997     }
998    
999     lock_sock(sk);
1000    
1001     prev=&(sock->fasync_list);
1002    
1003     for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
1004     if (fa->fa_file==filp)
1005     break;
1006    
1007     if(on)
1008     {
1009     if(fa!=NULL)
1010     {
1011     write_lock_bh(&sk->sk_callback_lock);
1012     fa->fa_fd=fd;
1013     write_unlock_bh(&sk->sk_callback_lock);
1014    
1015     kfree(fna);
1016     goto out;
1017     }
1018     fna->fa_file=filp;
1019     fna->fa_fd=fd;
1020     fna->magic=FASYNC_MAGIC;
1021     fna->fa_next=sock->fasync_list;
1022     write_lock_bh(&sk->sk_callback_lock);
1023     sock->fasync_list=fna;
1024     write_unlock_bh(&sk->sk_callback_lock);
1025     }
1026     else
1027     {
1028     if (fa!=NULL)
1029     {
1030     write_lock_bh(&sk->sk_callback_lock);
1031     *prev=fa->fa_next;
1032     write_unlock_bh(&sk->sk_callback_lock);
1033     kfree(fa);
1034     }
1035     }
1036    
1037     out:
1038     release_sock(sock->sk);
1039     return 0;
1040     }
1041    
1042     /* This function may be called only under socket lock or callback_lock */
1043    
1044     int sock_wake_async(struct socket *sock, int how, int band)
1045     {
1046     if (!sock || !sock->fasync_list)
1047     return -1;
1048     switch (how)
1049     {
1050     case 1:
1051    
1052     if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
1053     break;
1054     goto call_kill;
1055     case 2:
1056     if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
1057     break;
1058     /* fall through */
1059     case 0:
1060     call_kill:
1061     __kill_fasync(sock->fasync_list, SIGIO, band);
1062     break;
1063     case 3:
1064     __kill_fasync(sock->fasync_list, SIGURG, band);
1065     }
1066     return 0;
1067     }
1068    
1069     static int __sock_create(int family, int type, int protocol, struct socket **res, int kern)
1070     {
1071     int err;
1072     struct socket *sock;
1073    
1074     /*
1075     * Check protocol is in range
1076     */
1077     if (family < 0 || family >= NPROTO)
1078     return -EAFNOSUPPORT;
1079     if (type < 0 || type >= SOCK_MAX)
1080     return -EINVAL;
1081    
1082     /* Compatibility.
1083    
1084     This uglymoron is moved from INET layer to here to avoid
1085     deadlock in module load.
1086     */
1087     if (family == PF_INET && type == SOCK_PACKET) {
1088     static int warned;
1089     if (!warned) {
1090     warned = 1;
1091     printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
1092     }
1093     family = PF_PACKET;
1094     }
1095    
1096     err = security_socket_create(family, type, protocol, kern);
1097     if (err)
1098     return err;
1099    
1100     #if defined(CONFIG_KMOD)
1101     /* Attempt to load a protocol module if the find failed.
1102     *
1103     * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
1104     * requested real, full-featured networking support upon configuration.
1105     * Otherwise module support will break!
1106     */
1107     if (net_families[family]==NULL)
1108     {
1109     request_module("net-pf-%d",family);
1110     }
1111     #endif
1112    
1113     net_family_read_lock();
1114     if (net_families[family] == NULL) {
1115     err = -EAFNOSUPPORT;
1116     goto out;
1117     }
1118    
1119     /*
1120     * Allocate the socket and allow the family to set things up. if
1121     * the protocol is 0, the family is instructed to select an appropriate
1122     * default.
1123     */
1124    
1125     if (!(sock = sock_alloc())) {
1126     printk(KERN_WARNING "socket: no more sockets\n");
1127     err = -ENFILE; /* Not exactly a match, but its the
1128     closest posix thing */
1129     goto out;
1130     }
1131    
1132     sock->type = type;
1133    
1134     /*
1135     * We will call the ->create function, that possibly is in a loadable
1136     * module, so we have to bump that loadable module refcnt first.
1137     */
1138     err = -EAFNOSUPPORT;
1139     if (!try_module_get(net_families[family]->owner))
1140     goto out_release;
1141    
1142     if ((err = net_families[family]->create(sock, protocol)) < 0)
1143     goto out_module_put;
1144     /*
1145     * Now to bump the refcnt of the [loadable] module that owns this
1146     * socket at sock_release time we decrement its refcnt.
1147     */
1148     if (!try_module_get(sock->ops->owner)) {
1149     sock->ops = NULL;
1150     goto out_module_put;
1151     }
1152     /*
1153     * Now that we're done with the ->create function, the [loadable]
1154     * module can have its refcnt decremented
1155     */
1156     module_put(net_families[family]->owner);
1157     *res = sock;
1158     security_socket_post_create(sock, family, type, protocol, kern);
1159    
1160     out:
1161     net_family_read_unlock();
1162     return err;
1163     out_module_put:
1164     module_put(net_families[family]->owner);
1165     out_release:
1166     sock_release(sock);
1167     goto out;
1168     }
1169    
1170     int sock_create(int family, int type, int protocol, struct socket **res)
1171     {
1172     return __sock_create(family, type, protocol, res, 0);
1173     }
1174    
1175     int sock_create_kern(int family, int type, int protocol, struct socket **res)
1176     {
1177     return __sock_create(family, type, protocol, res, 1);
1178     }
1179    
1180     asmlinkage long sys_socket(int family, int type, int protocol)
1181     {
1182     int retval;
1183     struct socket *sock;
1184    
1185     retval = sock_create(family, type, protocol, &sock);
1186     if (retval < 0)
1187     goto out;
1188    
1189     retval = sock_map_fd(sock);
1190     if (retval < 0)
1191     goto out_release;
1192    
1193     out:
1194     /* It may be already another descriptor 8) Not kernel problem. */
1195     return retval;
1196    
1197     out_release:
1198     sock_release(sock);
1199     return retval;
1200     }
1201    
1202     /*
1203     * Create a pair of connected sockets.
1204     */
1205    
1206     asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec)
1207     {
1208     struct socket *sock1, *sock2;
1209     int fd1, fd2, err;
1210    
1211     /*
1212     * Obtain the first socket and check if the underlying protocol
1213     * supports the socketpair call.
1214     */
1215    
1216     err = sock_create(family, type, protocol, &sock1);
1217     if (err < 0)
1218     goto out;
1219    
1220     err = sock_create(family, type, protocol, &sock2);
1221     if (err < 0)
1222     goto out_release_1;
1223    
1224     err = sock1->ops->socketpair(sock1, sock2);
1225     if (err < 0)
1226     goto out_release_both;
1227    
1228     fd1 = fd2 = -1;
1229    
1230     err = sock_map_fd(sock1);
1231     if (err < 0)
1232     goto out_release_both;
1233     fd1 = err;
1234    
1235     err = sock_map_fd(sock2);
1236     if (err < 0)
1237     goto out_close_1;
1238     fd2 = err;
1239    
1240     /* fd1 and fd2 may be already another descriptors.
1241     * Not kernel problem.
1242     */
1243    
1244     err = put_user(fd1, &usockvec[0]);
1245     if (!err)
1246     err = put_user(fd2, &usockvec[1]);
1247     if (!err)
1248     return 0;
1249    
1250     sys_close(fd2);
1251     sys_close(fd1);
1252     return err;
1253    
1254     out_close_1:
1255     sock_release(sock2);
1256     sys_close(fd1);
1257     return err;
1258    
1259     out_release_both:
1260     sock_release(sock2);
1261     out_release_1:
1262     sock_release(sock1);
1263     out:
1264     return err;
1265     }
1266    
1267    
1268     /*
1269     * Bind a name to a socket. Nothing much to do here since it's
1270     * the protocol's responsibility to handle the local address.
1271     *
1272     * We move the socket address to kernel space before we call
1273     * the protocol layer (having also checked the address is ok).
1274     */
1275    
1276     asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen)
1277     {
1278     struct socket *sock;
1279     char address[MAX_SOCK_ADDR];
1280     int err;
1281    
1282     if((sock = sockfd_lookup(fd,&err))!=NULL)
1283     {
1284     if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) {
1285     err = security_socket_bind(sock, (struct sockaddr *)address, addrlen);
1286     if (err) {
1287     sockfd_put(sock);
1288     return err;
1289     }
1290     err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
1291     }
1292     sockfd_put(sock);
1293     }
1294     return err;
1295     }
1296    
1297    
1298     /*
1299     * Perform a listen. Basically, we allow the protocol to do anything
1300     * necessary for a listen, and if that works, we mark the socket as
1301     * ready for listening.
1302     */
1303    
1304     int sysctl_somaxconn = SOMAXCONN;
1305    
1306     asmlinkage long sys_listen(int fd, int backlog)
1307     {
1308     struct socket *sock;
1309     int err;
1310    
1311     if ((sock = sockfd_lookup(fd, &err)) != NULL) {
1312     if ((unsigned) backlog > sysctl_somaxconn)
1313     backlog = sysctl_somaxconn;
1314    
1315     err = security_socket_listen(sock, backlog);
1316     if (err) {
1317     sockfd_put(sock);
1318     return err;
1319     }
1320    
1321     err=sock->ops->listen(sock, backlog);
1322     sockfd_put(sock);
1323     }
1324     return err;
1325     }
1326    
1327    
1328     /*
1329     * For accept, we attempt to create a new socket, set up the link
1330     * with the client, wake up the client, then return the new
1331     * connected fd. We collect the address of the connector in kernel
1332     * space and move it to user at the very end. This is unclean because
1333     * we open the socket then return an error.
1334     *
1335     * 1003.1g adds the ability to recvmsg() to query connection pending
1336     * status to recvmsg. We need to add that support in a way thats
1337     * clean when we restucture accept also.
1338     */
1339    
1340     asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
1341     {
1342     struct socket *sock, *newsock;
1343     int err, len;
1344     char address[MAX_SOCK_ADDR];
1345    
1346     sock = sockfd_lookup(fd, &err);
1347     if (!sock)
1348     goto out;
1349    
1350     err = -ENFILE;
1351     if (!(newsock = sock_alloc()))
1352     goto out_put;
1353    
1354     newsock->type = sock->type;
1355     newsock->ops = sock->ops;
1356    
1357     err = security_socket_accept(sock, newsock);
1358     if (err)
1359     goto out_release;
1360    
1361     /*
1362     * We don't need try_module_get here, as the listening socket (sock)
1363     * has the protocol module (sock->ops->owner) held.
1364     */
1365     __module_get(newsock->ops->owner);
1366    
1367     err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1368     if (err < 0)
1369     goto out_release;
1370    
1371     if (upeer_sockaddr) {
1372     if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
1373     err = -ECONNABORTED;
1374     goto out_release;
1375     }
1376     err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
1377     if (err < 0)
1378     goto out_release;
1379     }
1380    
1381     /* File flags are not inherited via accept() unlike another OSes. */
1382    
1383     if ((err = sock_map_fd(newsock)) < 0)
1384     goto out_release;
1385    
1386     security_socket_post_accept(sock, newsock);
1387    
1388     out_put:
1389     sockfd_put(sock);
1390     out:
1391     return err;
1392     out_release:
1393     sock_release(newsock);
1394     goto out_put;
1395     }
1396    
1397    
1398     /*
1399     * Attempt to connect to a socket with the server address. The address
1400     * is in user space so we verify it is OK and move it to kernel space.
1401     *
1402     * For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1403     * break bindings
1404     *
1405     * NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1406     * other SEQPACKET protocols that take time to connect() as it doesn't
1407     * include the -EINPROGRESS status for such sockets.
1408     */
1409    
1410     asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen)
1411     {
1412     struct socket *sock;
1413     char address[MAX_SOCK_ADDR];
1414     int err;
1415    
1416     sock = sockfd_lookup(fd, &err);
1417     if (!sock)
1418     goto out;
1419     err = move_addr_to_kernel(uservaddr, addrlen, address);
1420     if (err < 0)
1421     goto out_put;
1422    
1423     err = security_socket_connect(sock, (struct sockaddr *)address, addrlen);
1424     if (err)
1425     goto out_put;
1426    
1427     err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
1428     sock->file->f_flags);
1429     out_put:
1430     sockfd_put(sock);
1431     out:
1432     return err;
1433     }
1434    
1435     /*
1436     * Get the local address ('name') of a socket object. Move the obtained
1437     * name to user space.
1438     */
1439    
1440     asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
1441     {
1442     struct socket *sock;
1443     char address[MAX_SOCK_ADDR];
1444     int len, err;
1445    
1446     sock = sockfd_lookup(fd, &err);
1447     if (!sock)
1448     goto out;
1449    
1450     err = security_socket_getsockname(sock);
1451     if (err)
1452     goto out_put;
1453    
1454     err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1455     if (err)
1456     goto out_put;
1457     err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1458    
1459     out_put:
1460     sockfd_put(sock);
1461     out:
1462     return err;
1463     }
1464    
1465     /*
1466     * Get the remote address ('name') of a socket object. Move the obtained
1467     * name to user space.
1468     */
1469    
1470     asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len)
1471     {
1472     struct socket *sock;
1473     char address[MAX_SOCK_ADDR];
1474     int len, err;
1475    
1476     if ((sock = sockfd_lookup(fd, &err))!=NULL)
1477     {
1478     err = security_socket_getpeername(sock);
1479     if (err) {
1480     sockfd_put(sock);
1481     return err;
1482     }
1483    
1484     err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
1485     if (!err)
1486     err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
1487     sockfd_put(sock);
1488     }
1489     return err;
1490     }
1491    
1492     /*
1493     * Send a datagram to a given address. We move the address into kernel
1494     * space and check the user space data area is readable before invoking
1495     * the protocol.
1496     */
1497    
1498     asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags,
1499     struct sockaddr __user *addr, int addr_len)
1500     {
1501     struct socket *sock;
1502     char address[MAX_SOCK_ADDR];
1503     int err;
1504     struct msghdr msg;
1505     struct iovec iov;
1506    
1507     sock = sockfd_lookup(fd, &err);
1508     if (!sock)
1509     goto out;
1510     iov.iov_base=buff;
1511     iov.iov_len=len;
1512     msg.msg_name=NULL;
1513     msg.msg_iov=&iov;
1514     msg.msg_iovlen=1;
1515     msg.msg_control=NULL;
1516     msg.msg_controllen=0;
1517     msg.msg_namelen=0;
1518     if(addr)
1519     {
1520     err = move_addr_to_kernel(addr, addr_len, address);
1521     if (err < 0)
1522     goto out_put;
1523     msg.msg_name=address;
1524     msg.msg_namelen=addr_len;
1525     }
1526     if (sock->file->f_flags & O_NONBLOCK)
1527     flags |= MSG_DONTWAIT;
1528     msg.msg_flags = flags;
1529     err = sock_sendmsg(sock, &msg, len);
1530    
1531     out_put:
1532     sockfd_put(sock);
1533     out:
1534     return err;
1535     }
1536    
1537     /*
1538     * Send a datagram down a socket.
1539     */
1540    
1541     asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags)
1542     {
1543     return sys_sendto(fd, buff, len, flags, NULL, 0);
1544     }
1545    
1546     /*
1547     * Receive a frame from the socket and optionally record the address of the
1548     * sender. We verify the buffers are writable and if needed move the
1549     * sender address from kernel to user space.
1550     */
1551    
1552     asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags,
1553     struct sockaddr __user *addr, int __user *addr_len)
1554     {
1555     struct socket *sock;
1556     struct iovec iov;
1557     struct msghdr msg;
1558     char address[MAX_SOCK_ADDR];
1559     int err,err2;
1560    
1561     sock = sockfd_lookup(fd, &err);
1562     if (!sock)
1563     goto out;
1564    
1565     msg.msg_control=NULL;
1566     msg.msg_controllen=0;
1567     msg.msg_iovlen=1;
1568     msg.msg_iov=&iov;
1569     iov.iov_len=size;
1570     iov.iov_base=ubuf;
1571     msg.msg_name=address;
1572     msg.msg_namelen=MAX_SOCK_ADDR;
1573     if (sock->file->f_flags & O_NONBLOCK)
1574     flags |= MSG_DONTWAIT;
1575     err=sock_recvmsg(sock, &msg, size, flags);
1576    
1577     if(err >= 0 && addr != NULL)
1578     {
1579     err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1580     if(err2<0)
1581     err=err2;
1582     }
1583     sockfd_put(sock);
1584     out:
1585     return err;
1586     }
1587    
1588     /*
1589     * Receive a datagram from a socket.
1590     */
1591    
1592     asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags)
1593     {
1594     return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1595     }
1596    
1597     /*
1598     * Set a socket option. Because we don't know the option lengths we have
1599     * to pass the user mode parameter for the protocols to sort out.
1600     */
1601    
1602     asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen)
1603     {
1604     int err;
1605     struct socket *sock;
1606    
1607     if (optlen < 0)
1608     return -EINVAL;
1609    
1610     if ((sock = sockfd_lookup(fd, &err))!=NULL)
1611     {
1612     err = security_socket_setsockopt(sock,level,optname);
1613     if (err) {
1614     sockfd_put(sock);
1615     return err;
1616     }
1617    
1618     if (level == SOL_SOCKET)
1619     err=sock_setsockopt(sock,level,optname,optval,optlen);
1620     else
1621     err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1622     sockfd_put(sock);
1623     }
1624     return err;
1625     }
1626    
1627     /*
1628     * Get a socket option. Because we don't know the option lengths we have
1629     * to pass a user mode parameter for the protocols to sort out.
1630     */
1631    
1632     asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen)
1633     {
1634     int err;
1635     struct socket *sock;
1636    
1637     if ((sock = sockfd_lookup(fd, &err))!=NULL)
1638     {
1639     err = security_socket_getsockopt(sock, level,
1640     optname);
1641     if (err) {
1642     sockfd_put(sock);
1643     return err;
1644     }
1645    
1646     if (level == SOL_SOCKET)
1647     err=sock_getsockopt(sock,level,optname,optval,optlen);
1648     else
1649     err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1650     sockfd_put(sock);
1651     }
1652     return err;
1653     }
1654    
1655    
1656     /*
1657     * Shutdown a socket.
1658     */
1659    
1660     asmlinkage long sys_shutdown(int fd, int how)
1661     {
1662     int err;
1663     struct socket *sock;
1664    
1665     if ((sock = sockfd_lookup(fd, &err))!=NULL)
1666     {
1667     err = security_socket_shutdown(sock, how);
1668     if (err) {
1669     sockfd_put(sock);
1670     return err;
1671     }
1672    
1673     err=sock->ops->shutdown(sock, how);
1674     sockfd_put(sock);
1675     }
1676     return err;
1677     }
1678    
1679     /* A couple of helpful macros for getting the address of the 32/64 bit
1680     * fields which are the same type (int / unsigned) on our platforms.
1681     */
1682     #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member)
1683     #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen)
1684     #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags)
1685    
1686    
1687     /*
1688     * BSD sendmsg interface
1689     */
1690    
1691     asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
1692     {
1693     struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
1694     struct socket *sock;
1695     char address[MAX_SOCK_ADDR];
1696     struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1697     unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1698     unsigned char *ctl_buf = ctl;
1699     struct msghdr msg_sys;
1700     int err, ctl_len, iov_size, total_len;
1701    
1702     err = -EFAULT;
1703     if (MSG_CMSG_COMPAT & flags) {
1704     if (get_compat_msghdr(&msg_sys, msg_compat))
1705     return -EFAULT;
1706     } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr)))
1707     return -EFAULT;
1708    
1709     sock = sockfd_lookup(fd, &err);
1710     if (!sock)
1711     goto out;
1712    
1713     /* do not move before msg_sys is valid */
1714     err = -EMSGSIZE;
1715     if (msg_sys.msg_iovlen > UIO_MAXIOV)
1716     goto out_put;
1717    
1718     /* Check whether to allocate the iovec area*/
1719     err = -ENOMEM;
1720     iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1721     if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1722     iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1723     if (!iov)
1724     goto out_put;
1725     }
1726    
1727     /* This will also move the address data into kernel space */
1728     if (MSG_CMSG_COMPAT & flags) {
1729     err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ);
1730     } else
1731     err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1732     if (err < 0)
1733     goto out_freeiov;
1734     total_len = err;
1735    
1736     err = -ENOBUFS;
1737    
1738     if (msg_sys.msg_controllen > INT_MAX)
1739     goto out_freeiov;
1740     ctl_len = msg_sys.msg_controllen;
1741     if ((MSG_CMSG_COMPAT & flags) && ctl_len) {
1742     err = cmsghdr_from_user_compat_to_kern(&msg_sys, ctl, sizeof(ctl));
1743     if (err)
1744     goto out_freeiov;
1745     ctl_buf = msg_sys.msg_control;
1746     } else if (ctl_len) {
1747     if (ctl_len > sizeof(ctl))
1748     {
1749     ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1750     if (ctl_buf == NULL)
1751     goto out_freeiov;
1752     }
1753     err = -EFAULT;
1754     /*
1755     * Careful! Before this, msg_sys.msg_control contains a user pointer.
1756     * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1757     * checking falls down on this.
1758     */
1759     if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len))
1760     goto out_freectl;
1761     msg_sys.msg_control = ctl_buf;
1762     }
1763     msg_sys.msg_flags = flags;
1764    
1765     if (sock->file->f_flags & O_NONBLOCK)
1766     msg_sys.msg_flags |= MSG_DONTWAIT;
1767     err = sock_sendmsg(sock, &msg_sys, total_len);
1768    
1769     out_freectl:
1770     if (ctl_buf != ctl)
1771     sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1772     out_freeiov:
1773     if (iov != iovstack)
1774     sock_kfree_s(sock->sk, iov, iov_size);
1775     out_put:
1776     sockfd_put(sock);
1777     out:
1778     return err;
1779     }
1780    
1781     /*
1782     * BSD recvmsg interface
1783     */
1784    
1785     asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags)
1786     {
1787     struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg;
1788     struct socket *sock;
1789     struct iovec iovstack[UIO_FASTIOV];
1790     struct iovec *iov=iovstack;
1791     struct msghdr msg_sys;
1792     unsigned long cmsg_ptr;
1793     int err, iov_size, total_len, len;
1794    
1795     /* kernel mode address */
1796     char addr[MAX_SOCK_ADDR];
1797    
1798     /* user mode address pointers */
1799     struct sockaddr __user *uaddr;
1800     int __user *uaddr_len;
1801    
1802     if (MSG_CMSG_COMPAT & flags) {
1803     if (get_compat_msghdr(&msg_sys, msg_compat))
1804     return -EFAULT;
1805     } else
1806     if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1807     return -EFAULT;
1808    
1809     sock = sockfd_lookup(fd, &err);
1810     if (!sock)
1811     goto out;
1812    
1813     err = -EMSGSIZE;
1814     if (msg_sys.msg_iovlen > UIO_MAXIOV)
1815     goto out_put;
1816    
1817     /* Check whether to allocate the iovec area*/
1818     err = -ENOMEM;
1819     iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1820     if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1821     iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1822     if (!iov)
1823     goto out_put;
1824     }
1825    
1826     /*
1827     * Save the user-mode address (verify_iovec will change the
1828     * kernel msghdr to use the kernel address space)
1829     */
1830    
1831     uaddr = (void __user *) msg_sys.msg_name;
1832     uaddr_len = COMPAT_NAMELEN(msg);
1833     if (MSG_CMSG_COMPAT & flags) {
1834     err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1835     } else
1836     err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1837     if (err < 0)
1838     goto out_freeiov;
1839     total_len=err;
1840    
1841     cmsg_ptr = (unsigned long)msg_sys.msg_control;
1842     msg_sys.msg_flags = 0;
1843     if (MSG_CMSG_COMPAT & flags)
1844     msg_sys.msg_flags = MSG_CMSG_COMPAT;
1845    
1846     if (sock->file->f_flags & O_NONBLOCK)
1847     flags |= MSG_DONTWAIT;
1848     err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1849     if (err < 0)
1850     goto out_freeiov;
1851     len = err;
1852    
1853     if (uaddr != NULL) {
1854     err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1855     if (err < 0)
1856     goto out_freeiov;
1857     }
1858     err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg));
1859     if (err)
1860     goto out_freeiov;
1861     if (MSG_CMSG_COMPAT & flags)
1862     err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
1863     &msg_compat->msg_controllen);
1864     else
1865     err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
1866     &msg->msg_controllen);
1867     if (err)
1868     goto out_freeiov;
1869     err = len;
1870    
1871     out_freeiov:
1872     if (iov != iovstack)
1873     sock_kfree_s(sock->sk, iov, iov_size);
1874     out_put:
1875     sockfd_put(sock);
1876     out:
1877     return err;
1878     }
1879    
1880     #ifdef __ARCH_WANT_SYS_SOCKETCALL
1881    
1882     /* Argument list sizes for sys_socketcall */
1883     #define AL(x) ((x) * sizeof(unsigned long))
1884     static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1885     AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1886     AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1887     #undef AL
1888    
1889     /*
1890     * System call vectors.
1891     *
1892     * Argument checking cleaned up. Saved 20% in size.
1893     * This function doesn't need to set the kernel lock because
1894     * it is set by the callees.
1895     */
1896    
1897     asmlinkage long sys_socketcall(int call, unsigned long __user *args)
1898     {
1899     unsigned long a[6];
1900     unsigned long a0,a1;
1901     int err;
1902    
1903     if(call<1||call>SYS_RECVMSG)
1904     return -EINVAL;
1905    
1906     /* copy_from_user should be SMP safe. */
1907     if (copy_from_user(a, args, nargs[call]))
1908     return -EFAULT;
1909    
1910     a0=a[0];
1911     a1=a[1];
1912    
1913     switch(call)
1914     {
1915     case SYS_SOCKET:
1916     err = sys_socket(a0,a1,a[2]);
1917     break;
1918     case SYS_BIND:
1919     err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]);
1920     break;
1921     case SYS_CONNECT:
1922     err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]);
1923     break;
1924     case SYS_LISTEN:
1925     err = sys_listen(a0,a1);
1926     break;
1927     case SYS_ACCEPT:
1928     err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
1929     break;
1930     case SYS_GETSOCKNAME:
1931     err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]);
1932     break;
1933     case SYS_GETPEERNAME:
1934     err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]);
1935     break;
1936     case SYS_SOCKETPAIR:
1937     err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]);
1938     break;
1939     case SYS_SEND:
1940     err = sys_send(a0, (void __user *)a1, a[2], a[3]);
1941     break;
1942     case SYS_SENDTO:
1943     err = sys_sendto(a0,(void __user *)a1, a[2], a[3],
1944     (struct sockaddr __user *)a[4], a[5]);
1945     break;
1946     case SYS_RECV:
1947     err = sys_recv(a0, (void __user *)a1, a[2], a[3]);
1948     break;
1949     case SYS_RECVFROM:
1950     err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3],
1951     (struct sockaddr __user *)a[4], (int __user *)a[5]);
1952     break;
1953     case SYS_SHUTDOWN:
1954     err = sys_shutdown(a0,a1);
1955     break;
1956     case SYS_SETSOCKOPT:
1957     err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]);
1958     break;
1959     case SYS_GETSOCKOPT:
1960     err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]);
1961     break;
1962     case SYS_SENDMSG:
1963     err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]);
1964     break;
1965     case SYS_RECVMSG:
1966     err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]);
1967     break;
1968     default:
1969     err = -EINVAL;
1970     break;
1971     }
1972     return err;
1973     }
1974    
1975     #endif /* __ARCH_WANT_SYS_SOCKETCALL */
1976    
1977     /*
1978     * This function is called by a protocol handler that wants to
1979     * advertise its address family, and have it linked into the
1980     * SOCKET module.
1981     */
1982    
1983     int sock_register(struct net_proto_family *ops)
1984     {
1985     int err;
1986    
1987     if (ops->family >= NPROTO) {
1988     printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1989     return -ENOBUFS;
1990     }
1991     net_family_write_lock();
1992     err = -EEXIST;
1993     if (net_families[ops->family] == NULL) {
1994     net_families[ops->family]=ops;
1995     err = 0;
1996     }
1997     net_family_write_unlock();
1998     printk(KERN_INFO "NET: Registered protocol family %d\n",
1999     ops->family);
2000     return err;
2001     }
2002    
2003     /*
2004     * This function is called by a protocol handler that wants to
2005     * remove its address family, and have it unlinked from the
2006     * SOCKET module.
2007     */
2008    
2009     int sock_unregister(int family)
2010     {
2011     if (family < 0 || family >= NPROTO)
2012     return -1;
2013    
2014     net_family_write_lock();
2015     net_families[family]=NULL;
2016     net_family_write_unlock();
2017     printk(KERN_INFO "NET: Unregistered protocol family %d\n",
2018     family);
2019     return 0;
2020     }
2021    
2022    
2023     extern void sk_init(void);
2024    
2025     void __init sock_init(void)
2026     {
2027     /*
2028     * Initialize sock SLAB cache.
2029     */
2030    
2031     sk_init();
2032    
2033     #ifdef SLAB_SKB
2034     /*
2035     * Initialize skbuff SLAB cache
2036     */
2037     skb_init();
2038     #endif
2039    
2040     /*
2041     * Initialize the protocols module.
2042     */
2043    
2044     init_inodecache();
2045     register_filesystem(&sock_fs_type);
2046     sock_mnt = kern_mount(&sock_fs_type);
2047     /* The real protocol initialization is performed when
2048     * do_initcalls is run.
2049     */
2050    
2051     #ifdef CONFIG_NETFILTER
2052     netfilter_init();
2053     #endif
2054     }
2055    
2056     #ifdef CONFIG_PROC_FS
2057     void socket_seq_show(struct seq_file *seq)
2058     {
2059     int cpu;
2060     int counter = 0;
2061    
2062     for (cpu = 0; cpu < NR_CPUS; cpu++)
2063     counter += per_cpu(sockets_in_use, cpu);
2064    
2065     /* It can be negative, by the way. 8) */
2066     if (counter < 0)
2067     counter = 0;
2068    
2069     seq_printf(seq, "sockets: used %d\n", counter);
2070     }
2071     #endif /* CONFIG_PROC_FS */
2072    
2073     /* ABI emulation layers need these two */
2074     EXPORT_SYMBOL(move_addr_to_kernel);
2075     EXPORT_SYMBOL(move_addr_to_user);
2076     EXPORT_SYMBOL(sock_create);
2077     EXPORT_SYMBOL(sock_create_kern);
2078     EXPORT_SYMBOL(sock_create_lite);
2079     EXPORT_SYMBOL(sock_map_fd);
2080     EXPORT_SYMBOL(sock_recvmsg);
2081     EXPORT_SYMBOL(sock_register);
2082     EXPORT_SYMBOL(sock_release);
2083     EXPORT_SYMBOL(sock_sendmsg);
2084     EXPORT_SYMBOL(sock_unregister);
2085     EXPORT_SYMBOL(sock_wake_async);
2086     EXPORT_SYMBOL(sockfd_lookup);
2087     EXPORT_SYMBOL(kernel_sendmsg);
2088     EXPORT_SYMBOL(kernel_recvmsg);