1 /*
2  *  linux/kernel/sys.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6 
7 #include <linux/module.h>
8 #include <linux/mm.h>
9 #include <linux/utsname.h>
10 #include <linux/mman.h>
11 #include <linux/smp_lock.h>
12 #include <linux/notifier.h>
13 #include <linux/reboot.h>
14 #include <linux/prctl.h>
15 #include <linux/init.h>
16 #include <linux/highuid.h>
17 
18 #include <asm/uaccess.h>
19 #include <asm/io.h>
20 
21 /*
22  * this is where the system-wide overflow UID and GID are defined, for
23  * architectures that now have 32-bit UID/GID but didn't in the past
24  */
25 
26 int overflowuid = DEFAULT_OVERFLOWUID;
27 int overflowgid = DEFAULT_OVERFLOWGID;
28 
29 /*
30  * the same as above, but for filesystems which can only store a 16-bit
31  * UID and GID. as such, this is needed on all architectures
32  */
33 
34 int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
35 int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;
36 
37 /*
38  * this indicates whether you can reboot with ctrl-alt-del: the default is yes
39  */
40 
41 int C_A_D = 1;
42 int cad_pid = 1;
43 
44 
45 /*
46  *      Notifier list for kernel code which wants to be called
47  *      at shutdown. This is used to stop any idling DMA operations
48  *      and the like. 
49  */
50 
51 static struct notifier_block *reboot_notifier_list;
52 rwlock_t notifier_lock = RW_LOCK_UNLOCKED;
53 
54 /**
55  *      notifier_chain_register - Add notifier to a notifier chain
56  *      @list: Pointer to root list pointer
57  *      @n: New entry in notifier chain
58  *
59  *      Adds a notifier to a notifier chain.
60  *
61  *      Currently always returns zero.
62  */
63  
64 int notifier_chain_register(struct notifier_block **list, struct notifier_block *n)
65 {
66         write_lock(&notifier_lock);
67         while(*list)
68         {
69                 if(n->priority > (*list)->priority)
70                         break;
71                 list= &((*list)->next);
72         }
73         n->next = *list;
74         *list=n;
75         write_unlock(&notifier_lock);
76         return 0;
77 }
78 
79 /**
80  *      notifier_chain_unregister - Remove notifier from a notifier chain
81  *      @nl: Pointer to root list pointer
82  *      @n: New entry in notifier chain
83  *
84  *      Removes a notifier from a notifier chain.
85  *
86  *      Returns zero on success, or %-ENOENT on failure.
87  */
88  
89 int notifier_chain_unregister(struct notifier_block **nl, struct notifier_block *n)
90 {
91         write_lock(&notifier_lock);
92         while((*nl)!=NULL)
93         {
94                 if((*nl)==n)
95                 {
96                         *nl=n->next;
97                         write_unlock(&notifier_lock);
98                         return 0;
99                 }
100                 nl=&((*nl)->next);
101         }
102         write_unlock(&notifier_lock);
103         return -ENOENT;
104 }
105 
106 /**
107  *      notifier_call_chain - Call functions in a notifier chain
108  *      @n: Pointer to root pointer of notifier chain
109  *      @val: Value passed unmodified to notifier function
110  *      @v: Pointer passed unmodified to notifier function
111  *
112  *      Calls each function in a notifier chain in turn.
113  *
114  *      If the return value of the notifier can be and'd
115  *      with %NOTIFY_STOP_MASK, then notifier_call_chain
116  *      will return immediately, with the return value of
117  *      the notifier function which halted execution.
118  *      Otherwise, the return value is the return value
119  *      of the last notifier function called.
120  */
121  
122 int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
123 {
124         int ret=NOTIFY_DONE;
125         struct notifier_block *nb = *n;
126 
127         while(nb)
128         {
129                 ret=nb->notifier_call(nb,val,v);
130                 if(ret&NOTIFY_STOP_MASK)
131                 {
132                         return ret;
133                 }
134                 nb=nb->next;
135         }
136         return ret;
137 }
138 
139 /**
140  *      register_reboot_notifier - Register function to be called at reboot time
141  *      @nb: Info about notifier function to be called
142  *
143  *      Registers a function with the list of functions
144  *      to be called at reboot time.
145  *
146  *      Currently always returns zero, as notifier_chain_register
147  *      always returns zero.
148  */
149  
150 int register_reboot_notifier(struct notifier_block * nb)
151 {
152         return notifier_chain_register(&reboot_notifier_list, nb);
153 }
154 
155 /**
156  *      unregister_reboot_notifier - Unregister previously registered reboot notifier
157  *      @nb: Hook to be unregistered
158  *
159  *      Unregisters a previously registered reboot
160  *      notifier function.
161  *
162  *      Returns zero on success, or %-ENOENT on failure.
163  */
164  
165 int unregister_reboot_notifier(struct notifier_block * nb)
166 {
167         return notifier_chain_unregister(&reboot_notifier_list, nb);
168 }
169 
170 asmlinkage long sys_ni_syscall(void)
171 {
172         return -ENOSYS;
173 }
174 
175 static int proc_sel(struct task_struct *p, int which, int who)
176 {
177         if(p->pid)
178         {
179                 switch (which) {
180                         case PRIO_PROCESS:
181                                 if (!who && p == current)
182                                         return 1;
183                                 return(p->pid == who);
184                         case PRIO_PGRP:
185                                 if (!who)
186                                         who = current->pgrp;
187                                 return(p->pgrp == who);
188                         case PRIO_USER:
189                                 if (!who)
190                                         who = current->uid;
191                                 return(p->uid == who);
192                 }
193         }
194         return 0;
195 }
196 
197 asmlinkage long sys_setpriority(int which, int who, int niceval)
198 {
199         struct task_struct *p;
200         int error;
201 
202         if (which > 2 || which < 0)
203                 return -EINVAL;
204 
205         /* normalize: avoid signed division (rounding problems) */
206         error = -ESRCH;
207         if (niceval < -20)
208                 niceval = -20;
209         if (niceval > 19)
210                 niceval = 19;
211 
212         read_lock(&tasklist_lock);
213         for_each_task(p) {
214                 if (!proc_sel(p, which, who))
215                         continue;
216                 if (p->uid != current->euid &&
217                         p->uid != current->uid && !capable(CAP_SYS_NICE)) {
218                         error = -EPERM;
219                         continue;
220                 }
221                 if (error == -ESRCH)
222                         error = 0;
223                 if (niceval < p->nice && !capable(CAP_SYS_NICE))
224                         error = -EACCES;
225                 else
226                         p->nice = niceval;
227         }
228         read_unlock(&tasklist_lock);
229 
230         return error;
231 }
232 
233 /*
234  * Ugh. To avoid negative return values, "getpriority()" will
235  * not return the normal nice-value, but a negated value that
236  * has been offset by 20 (ie it returns 40..1 instead of -20..19)
237  * to stay compatible.
238  */
239 asmlinkage long sys_getpriority(int which, int who)
240 {
241         struct task_struct *p;
242         long retval = -ESRCH;
243 
244         if (which > 2 || which < 0)
245                 return -EINVAL;
246 
247         read_lock(&tasklist_lock);
248         for_each_task (p) {
249                 long niceval;
250                 if (!proc_sel(p, which, who))
251                         continue;
252                 niceval = 20 - p->nice;
253                 if (niceval > retval)
254                         retval = niceval;
255         }
256         read_unlock(&tasklist_lock);
257 
258         return retval;
259 }
260 
261 
262 /*
263  * Reboot system call: for obvious reasons only root may call it,
264  * and even root needs to set up some magic numbers in the registers
265  * so that some mistake won't make this reboot the whole machine.
266  * You can also set the meaning of the ctrl-alt-del-key here.
267  *
268  * reboot doesn't sync: do that yourself before calling this.
269  */
270 asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void * arg)
271 {
272         char buffer[256];
273 
274         /* We only trust the superuser with rebooting the system. */
275         if (!capable(CAP_SYS_BOOT))
276                 return -EPERM;
277 
278         /* For safety, we require "magic" arguments. */
279         if (magic1 != LINUX_REBOOT_MAGIC1 ||
280             (magic2 != LINUX_REBOOT_MAGIC2 && magic2 != LINUX_REBOOT_MAGIC2A &&
281                         magic2 != LINUX_REBOOT_MAGIC2B))
282                 return -EINVAL;
283 
284         lock_kernel();
285         switch (cmd) {
286         case LINUX_REBOOT_CMD_RESTART:
287                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
288                 printk(KERN_EMERG "Restarting system.\n");
289                 machine_restart(NULL);
290                 break;
291 
292         case LINUX_REBOOT_CMD_CAD_ON:
293                 C_A_D = 1;
294                 break;
295 
296         case LINUX_REBOOT_CMD_CAD_OFF:
297                 C_A_D = 0;
298                 break;
299 
300         case LINUX_REBOOT_CMD_HALT:
301                 notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL);
302                 printk(KERN_EMERG "System halted.\n");
303                 machine_halt();
304                 do_exit(0);
305                 break;
306 
307         case LINUX_REBOOT_CMD_POWER_OFF:
308                 notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL);
309                 printk(KERN_EMERG "Power down.\n");
310                 machine_power_off();
311                 do_exit(0);
312                 break;
313 
314         case LINUX_REBOOT_CMD_RESTART2:
315                 if (strncpy_from_user(&buffer[0], (char *)arg, sizeof(buffer) - 1) < 0) {
316                         unlock_kernel();
317                         return -EFAULT;
318                 }
319                 buffer[sizeof(buffer) - 1] = '\0';
320 
321                 notifier_call_chain(&reboot_notifier_list, SYS_RESTART, buffer);
322                 printk(KERN_EMERG "Restarting system with command '%s'.\n", buffer);
323                 machine_restart(buffer);
324                 break;
325 
326         default:
327                 unlock_kernel();
328                 return -EINVAL;
329         }
330         unlock_kernel();
331         return 0;
332 }
333 
334 static void deferred_cad(void *dummy)
335 {
336         notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL);
337         machine_restart(NULL);
338 }
339 
340 /*
341  * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
342  * As it's called within an interrupt, it may NOT sync: the only choice
343  * is whether to reboot at once, or just ignore the ctrl-alt-del.
344  */
345 void ctrl_alt_del(void)
346 {
347         static struct tq_struct cad_tq = {
348                 routine: deferred_cad,
349         };
350 
351         if (C_A_D)
352                 schedule_task(&cad_tq);
353         else
354                 kill_proc(cad_pid, SIGINT, 1);
355 }
356         
357 
358 /*
359  * Unprivileged users may change the real gid to the effective gid
360  * or vice versa.  (BSD-style)
361  *
362  * If you set the real gid at all, or set the effective gid to a value not
363  * equal to the real gid, then the saved gid is set to the new effective gid.
364  *
365  * This makes it possible for a setgid program to completely drop its
366  * privileges, which is often a useful assertion to make when you are doing
367  * a security audit over a program.
368  *
369  * The general idea is that a program which uses just setregid() will be
370  * 100% compatible with BSD.  A program which uses just setgid() will be
371  * 100% compatible with POSIX with saved IDs. 
372  *
373  * SMP: There are not races, the GIDs are checked only by filesystem
374  *      operations (as far as semantic preservation is concerned).
375  */
376 asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
377 {
378         int old_rgid = current->gid;
379         int old_egid = current->egid;
380         int new_rgid = old_rgid;
381         int new_egid = old_egid;
382 
383         if (rgid != (gid_t) -1) {
384                 if ((old_rgid == rgid) ||
385                     (current->egid==rgid) ||
386                     capable(CAP_SETGID))
387                         new_rgid = rgid;
388                 else
389                         return -EPERM;
390         }
391         if (egid != (gid_t) -1) {
392                 if ((old_rgid == egid) ||
393                     (current->egid == egid) ||
394                     (current->sgid == egid) ||
395                     capable(CAP_SETGID))
396                         new_egid = egid;
397                 else {
398                         return -EPERM;
399                 }
400         }
401         if (new_egid != old_egid)
402         {
403                 current->mm->dumpable = 0;
404                 wmb();
405         }
406         if (rgid != (gid_t) -1 ||
407             (egid != (gid_t) -1 && egid != old_rgid))
408                 current->sgid = new_egid;
409         current->fsgid = new_egid;
410         current->egid = new_egid;
411         current->gid = new_rgid;
412         return 0;
413 }
414 
415 /*
416  * setgid() is implemented like SysV w/ SAVED_IDS 
417  *
418  * SMP: Same implicit races as above.
419  */
420 asmlinkage long sys_setgid(gid_t gid)
421 {
422         int old_egid = current->egid;
423 
424         if (capable(CAP_SETGID))
425         {
426                 if(old_egid != gid)
427                 {
428                         current->mm->dumpable=0;
429                         wmb();
430                 }
431                 current->gid = current->egid = current->sgid = current->fsgid = gid;
432         }
433         else if ((gid == current->gid) || (gid == current->sgid))
434         {
435                 if(old_egid != gid)
436                 {
437                         current->mm->dumpable=0;
438                         wmb();
439                 }
440                 current->egid = current->fsgid = gid;
441         }
442         else
443                 return -EPERM;
444         return 0;
445 }
446   
447 /* 
448  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
449  * a process after a call to setuid, setreuid, or setresuid.
450  *
451  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
452  *  {r,e,s}uid != 0, the permitted and effective capabilities are
453  *  cleared.
454  *
455  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
456  *  capabilities of the process are cleared.
457  *
458  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
459  *  capabilities are set to the permitted capabilities.
460  *
461  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should 
462  *  never happen.
463  *
464  *  -astor 
465  *
466  * cevans - New behaviour, Oct '99
467  * A process may, via prctl(), elect to keep its capabilities when it
468  * calls setuid() and switches away from uid==0. Both permitted and
469  * effective sets will be retained.
470  * Without this change, it was impossible for a daemon to drop only some
471  * of its privilege. The call to setuid(!=0) would drop all privileges!
472  * Keeping uid 0 is not an option because uid 0 owns too many vital
473  * files..
474  * Thanks to Olaf Kirch and Peter Benie for spotting this.
475  */
476 static inline void cap_emulate_setxuid(int old_ruid, int old_euid, 
477                                        int old_suid)
478 {
479         if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) &&
480             (current->uid != 0 && current->euid != 0 && current->suid != 0) &&
481             !current->keep_capabilities) {
482                 cap_clear(current->cap_permitted);
483                 cap_clear(current->cap_effective);
484         }
485         if (old_euid == 0 && current->euid != 0) {
486                 cap_clear(current->cap_effective);
487         }
488         if (old_euid != 0 && current->euid == 0) {
489                 current->cap_effective = current->cap_permitted;
490         }
491 }
492 
493 static int set_user(uid_t new_ruid, int dumpclear)
494 {
495         struct user_struct *new_user, *old_user;
496 
497         /* What if a process setreuid()'s and this brings the
498          * new uid over his NPROC rlimit?  We can check this now
499          * cheaply with the new uid cache, so if it matters
500          * we should be checking for it.  -DaveM
501          */
502         new_user = alloc_uid(new_ruid);
503         if (!new_user)
504                 return -EAGAIN;
505         old_user = current->user;
506         atomic_dec(&old_user->processes);
507         atomic_inc(&new_user->processes);
508 
509         if(dumpclear)
510         {
511                 current->mm->dumpable = 0;
512                 wmb();
513         }
514         current->uid = new_ruid;
515         current->user = new_user;
516         free_uid(old_user);
517         return 0;
518 }
519 
520 /*
521  * Unprivileged users may change the real uid to the effective uid
522  * or vice versa.  (BSD-style)
523  *
524  * If you set the real uid at all, or set the effective uid to a value not
525  * equal to the real uid, then the saved uid is set to the new effective uid.
526  *
527  * This makes it possible for a setuid program to completely drop its
528  * privileges, which is often a useful assertion to make when you are doing
529  * a security audit over a program.
530  *
531  * The general idea is that a program which uses just setreuid() will be
532  * 100% compatible with BSD.  A program which uses just setuid() will be
533  * 100% compatible with POSIX with saved IDs. 
534  */
535 asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
536 {
537         int old_ruid, old_euid, old_suid, new_ruid, new_euid;
538 
539         new_ruid = old_ruid = current->uid;
540         new_euid = old_euid = current->euid;
541         old_suid = current->suid;
542 
543         if (ruid != (uid_t) -1) {
544                 new_ruid = ruid;
545                 if ((old_ruid != ruid) &&
546                     (current->euid != ruid) &&
547                     !capable(CAP_SETUID))
548                         return -EPERM;
549         }
550 
551         if (euid != (uid_t) -1) {
552                 new_euid = euid;
553                 if ((old_ruid != euid) &&
554                     (current->euid != euid) &&
555                     (current->suid != euid) &&
556                     !capable(CAP_SETUID))
557                         return -EPERM;
558         }
559 
560         if (new_ruid != old_ruid && set_user(new_ruid, new_euid != old_euid) < 0)
561                 return -EAGAIN;
562 
563         if (new_euid != old_euid)
564         {
565                 current->mm->dumpable=0;
566                 wmb();
567         }
568         current->fsuid = current->euid = new_euid;
569         if (ruid != (uid_t) -1 ||
570             (euid != (uid_t) -1 && euid != old_ruid))
571                 current->suid = current->euid;
572         current->fsuid = current->euid;
573 
574         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
575                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
576         }
577 
578         return 0;
579 }
580 
581 
582                 
583 /*
584  * setuid() is implemented like SysV with SAVED_IDS 
585  * 
586  * Note that SAVED_ID's is deficient in that a setuid root program
587  * like sendmail, for example, cannot set its uid to be a normal 
588  * user and then switch back, because if you're root, setuid() sets
589  * the saved uid too.  If you don't like this, blame the bright people
590  * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
591  * will allow a root program to temporarily drop privileges and be able to
592  * regain them by swapping the real and effective uid.  
593  */
594 asmlinkage long sys_setuid(uid_t uid)
595 {
596         int old_euid = current->euid;
597         int old_ruid, old_suid, new_ruid, new_suid;
598 
599         old_ruid = new_ruid = current->uid;
600         old_suid = current->suid;
601         new_suid = old_suid;
602         
603         if (capable(CAP_SETUID)) {
604                 if (uid != old_ruid && set_user(uid, old_euid != uid) < 0)
605                         return -EAGAIN;
606                 new_suid = uid;
607         } else if ((uid != current->uid) && (uid != new_suid))
608                 return -EPERM;
609 
610         if (old_euid != uid)
611         {
612                 current->mm->dumpable = 0;
613                 wmb();
614         }
615         current->fsuid = current->euid = uid;
616         current->suid = new_suid;
617 
618         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
619                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
620         }
621 
622         return 0;
623 }
624 
625 
626 /*
627  * This function implements a generic ability to update ruid, euid,
628  * and suid.  This allows you to implement the 4.4 compatible seteuid().
629  */
630 asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
631 {
632         int old_ruid = current->uid;
633         int old_euid = current->euid;
634         int old_suid = current->suid;
635 
636         if (!capable(CAP_SETUID)) {
637                 if ((ruid != (uid_t) -1) && (ruid != current->uid) &&
638                     (ruid != current->euid) && (ruid != current->suid))
639                         return -EPERM;
640                 if ((euid != (uid_t) -1) && (euid != current->uid) &&
641                     (euid != current->euid) && (euid != current->suid))
642                         return -EPERM;
643                 if ((suid != (uid_t) -1) && (suid != current->uid) &&
644                     (suid != current->euid) && (suid != current->suid))
645                         return -EPERM;
646         }
647         if (ruid != (uid_t) -1) {
648                 if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0)
649                         return -EAGAIN;
650         }
651         if (euid != (uid_t) -1) {
652                 if (euid != current->euid)
653                 {
654                         current->mm->dumpable = 0;
655                         wmb();
656                 }
657                 current->euid = euid;
658                 current->fsuid = euid;
659         }
660         if (suid != (uid_t) -1)
661                 current->suid = suid;
662 
663         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
664                 cap_emulate_setxuid(old_ruid, old_euid, old_suid);
665         }
666 
667         return 0;
668 }
669 
670 asmlinkage long sys_getresuid(uid_t *ruid, uid_t *euid, uid_t *suid)
671 {
672         int retval;
673 
674         if (!(retval = put_user(current->uid, ruid)) &&
675             !(retval = put_user(current->euid, euid)))
676                 retval = put_user(current->suid, suid);
677 
678         return retval;
679 }
680 
681 /*
682  * Same as above, but for rgid, egid, sgid.
683  */
684 asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
685 {
686         if (!capable(CAP_SETGID)) {
687                 if ((rgid != (gid_t) -1) && (rgid != current->gid) &&
688                     (rgid != current->egid) && (rgid != current->sgid))
689                         return -EPERM;
690                 if ((egid != (gid_t) -1) && (egid != current->gid) &&
691                     (egid != current->egid) && (egid != current->sgid))
692                         return -EPERM;
693                 if ((sgid != (gid_t) -1) && (sgid != current->gid) &&
694                     (sgid != current->egid) && (sgid != current->sgid))
695                         return -EPERM;
696         }
697         if (egid != (gid_t) -1) {
698                 if (egid != current->egid)
699                 {
700                         current->mm->dumpable = 0;
701                         wmb();
702                 }
703                 current->egid = egid;
704                 current->fsgid = egid;
705         }
706         if (rgid != (gid_t) -1)
707                 current->gid = rgid;
708         if (sgid != (gid_t) -1)
709                 current->sgid = sgid;
710         return 0;
711 }
712 
713 asmlinkage long sys_getresgid(gid_t *rgid, gid_t *egid, gid_t *sgid)
714 {
715         int retval;
716 
717         if (!(retval = put_user(current->gid, rgid)) &&
718             !(retval = put_user(current->egid, egid)))
719                 retval = put_user(current->sgid, sgid);
720 
721         return retval;
722 }
723 
724 
725 /*
726  * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
727  * is used for "access()" and for the NFS daemon (letting nfsd stay at
728  * whatever uid it wants to). It normally shadows "euid", except when
729  * explicitly set by setfsuid() or for access..
730  */
731 asmlinkage long sys_setfsuid(uid_t uid)
732 {
733         int old_fsuid;
734 
735         old_fsuid = current->fsuid;
736         if (uid == current->uid || uid == current->euid ||
737             uid == current->suid || uid == current->fsuid || 
738             capable(CAP_SETUID))
739         {
740                 if (uid != old_fsuid)
741                 {
742                         current->mm->dumpable = 0;
743                         wmb();
744                 }
745                 current->fsuid = uid;
746         }
747 
748         /* We emulate fsuid by essentially doing a scaled-down version
749          * of what we did in setresuid and friends. However, we only
750          * operate on the fs-specific bits of the process' effective
751          * capabilities 
752          *
753          * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
754          *          if not, we might be a bit too harsh here.
755          */
756         
757         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
758                 if (old_fsuid == 0 && current->fsuid != 0) {
759                         cap_t(current->cap_effective) &= ~CAP_FS_MASK;
760                 }
761                 if (old_fsuid != 0 && current->fsuid == 0) {
762                         cap_t(current->cap_effective) |=
763                                 (cap_t(current->cap_permitted) & CAP_FS_MASK);
764                 }
765         }
766 
767         return old_fsuid;
768 }
769 
770 /*
771  * Samma på svenska..
772  */
773 asmlinkage long sys_setfsgid(gid_t gid)
774 {
775         int old_fsgid;
776 
777         old_fsgid = current->fsgid;
778         if (gid == current->gid || gid == current->egid ||
779             gid == current->sgid || gid == current->fsgid || 
780             capable(CAP_SETGID))
781         {
782                 if (gid != old_fsgid)
783                 {
784                         current->mm->dumpable = 0;
785                         wmb();
786                 }
787                 current->fsgid = gid;
788         }
789         return old_fsgid;
790 }
791 
792 asmlinkage long sys_times(struct tms * tbuf)
793 {
794         /*
795          *      In the SMP world we might just be unlucky and have one of
796          *      the times increment as we use it. Since the value is an
797          *      atomically safe type this is just fine. Conceptually its
798          *      as if the syscall took an instant longer to occur.
799          */
800         if (tbuf)
801                 if (copy_to_user(tbuf, &current->times, sizeof(struct tms)))
802                         return -EFAULT;
803         return jiffies;
804 }
805 
806 /*
807  * This needs some heavy checking ...
808  * I just haven't the stomach for it. I also don't fully
809  * understand sessions/pgrp etc. Let somebody who does explain it.
810  *
811  * OK, I think I have the protection semantics right.... this is really
812  * only important on a multi-user system anyway, to make sure one user
813  * can't send a signal to a process owned by another.  -TYT, 12/12/91
814  *
815  * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
816  * LBT 04.03.94
817  */
818 
819 asmlinkage long sys_setpgid(pid_t pid, pid_t pgid)
820 {
821         struct task_struct * p;
822         int err = -EINVAL;
823 
824         if (!pid)
825                 pid = current->pid;
826         if (!pgid)
827                 pgid = pid;
828         if (pgid < 0)
829                 return -EINVAL;
830 
831         /* From this point forward we keep holding onto the tasklist lock
832          * so that our parent does not change from under us. -DaveM
833          */
834         read_lock(&tasklist_lock);
835 
836         err = -ESRCH;
837         p = find_task_by_pid(pid);
838         if (!p)
839                 goto out;
840 
841         if (p->p_pptr == current || p->p_opptr == current) {
842                 err = -EPERM;
843                 if (p->session != current->session)
844                         goto out;
845                 err = -EACCES;
846                 if (p->did_exec)
847                         goto out;
848         } else if (p != current)
849                 goto out;
850         err = -EPERM;
851         if (p->leader)
852                 goto out;
853         if (pgid != pid) {
854                 struct task_struct * tmp;
855                 for_each_task (tmp) {
856                         if (tmp->pgrp == pgid &&
857                             tmp->session == current->session)
858                                 goto ok_pgid;
859                 }
860                 goto out;
861         }
862 
863 ok_pgid:
864         p->pgrp = pgid;
865         err = 0;
866 out:
867         /* All paths lead to here, thus we are safe. -DaveM */
868         read_unlock(&tasklist_lock);
869         return err;
870 }
871 
872 asmlinkage long sys_getpgid(pid_t pid)
873 {
874         if (!pid) {
875                 return current->pgrp;
876         } else {
877                 int retval;
878                 struct task_struct *p;
879 
880                 read_lock(&tasklist_lock);
881                 p = find_task_by_pid(pid);
882 
883                 retval = -ESRCH;
884                 if (p)
885                         retval = p->pgrp;
886                 read_unlock(&tasklist_lock);
887                 return retval;
888         }
889 }
890 
891 asmlinkage long sys_getpgrp(void)
892 {
893         /* SMP - assuming writes are word atomic this is fine */
894         return current->pgrp;
895 }
896 
897 asmlinkage long sys_getsid(pid_t pid)
898 {
899         if (!pid) {
900                 return current->session;
901         } else {
902                 int retval;
903                 struct task_struct *p;
904 
905                 read_lock(&tasklist_lock);
906                 p = find_task_by_pid(pid);
907 
908                 retval = -ESRCH;
909                 if(p)
910                         retval = p->session;
911                 read_unlock(&tasklist_lock);
912                 return retval;
913         }
914 }
915 
916 asmlinkage long sys_setsid(void)
917 {
918         struct task_struct * p;
919         int err = -EPERM;
920 
921         read_lock(&tasklist_lock);
922         for_each_task(p) {
923                 if (p->pgrp == current->pid)
924                         goto out;
925         }
926 
927         current->leader = 1;
928         current->session = current->pgrp = current->pid;
929         current->tty = NULL;
930         current->tty_old_pgrp = 0;
931         err = current->pgrp;
932 out:
933         read_unlock(&tasklist_lock);
934         return err;
935 }
936 
937 /*
938  * Supplementary group IDs
939  */
940 asmlinkage long sys_getgroups(int gidsetsize, gid_t *grouplist)
941 {
942         int i;
943         
944         /*
945          *      SMP: Nobody else can change our grouplist. Thus we are
946          *      safe.
947          */
948 
949         if (gidsetsize < 0)
950                 return -EINVAL;
951         i = current->ngroups;
952         if (gidsetsize) {
953                 if (i > gidsetsize)
954                         return -EINVAL;
955                 if (copy_to_user(grouplist, current->groups, sizeof(gid_t)*i))
956                         return -EFAULT;
957         }
958         return i;
959 }
960 
961 /*
962  *      SMP: Our groups are not shared. We can copy to/from them safely
963  *      without another task interfering.
964  */
965  
966 asmlinkage long sys_setgroups(int gidsetsize, gid_t *grouplist)
967 {
968         if (!capable(CAP_SETGID))
969                 return -EPERM;
970         if ((unsigned) gidsetsize > NGROUPS)
971                 return -EINVAL;
972         if(copy_from_user(current->groups, grouplist, gidsetsize * sizeof(gid_t)))
973                 return -EFAULT;
974         current->ngroups = gidsetsize;
975         return 0;
976 }
977 
978 static int supplemental_group_member(gid_t grp)
979 {
980         int i = current->ngroups;
981 
982         if (i) {
983                 gid_t *groups = current->groups;
984                 do {
985                         if (*groups == grp)
986                                 return 1;
987                         groups++;
988                         i--;
989                 } while (i);
990         }
991         return 0;
992 }
993 
994 /*
995  * Check whether we're fsgid/egid or in the supplemental group..
996  */
997 int in_group_p(gid_t grp)
998 {
999         int retval = 1;
1000         if (grp != current->fsgid)
1001                 retval = supplemental_group_member(grp);
1002         return retval;
1003 }
1004 
1005 int in_egroup_p(gid_t grp)
1006 {
1007         int retval = 1;
1008         if (grp != current->egid)
1009                 retval = supplemental_group_member(grp);
1010         return retval;
1011 }
1012 
1013 DECLARE_RWSEM(uts_sem);
1014 
1015 asmlinkage long sys_newuname(struct new_utsname * name)
1016 {
1017         int errno = 0;
1018 
1019         down_read(&uts_sem);
1020         if (copy_to_user(name,&system_utsname,sizeof *name))
1021                 errno = -EFAULT;
1022         up_read(&uts_sem);
1023         return errno;
1024 }
1025 
1026 asmlinkage long sys_sethostname(char *name, int len)
1027 {
1028         int errno;
1029 
1030         if (!capable(CAP_SYS_ADMIN))
1031                 return -EPERM;
1032         if (len < 0 || len > __NEW_UTS_LEN)
1033                 return -EINVAL;
1034         down_write(&uts_sem);
1035         errno = -EFAULT;
1036         if (!copy_from_user(system_utsname.nodename, name, len)) {
1037                 system_utsname.nodename[len] = 0;
1038                 errno = 0;
1039         }
1040         up_write(&uts_sem);
1041         return errno;
1042 }
1043 
1044 asmlinkage long sys_gethostname(char *name, int len)
1045 {
1046         int i, errno;
1047 
1048         if (len < 0)
1049                 return -EINVAL;
1050         down_read(&uts_sem);
1051         i = 1 + strlen(system_utsname.nodename);
1052         if (i > len)
1053                 i = len;
1054         errno = 0;
1055         if (copy_to_user(name, system_utsname.nodename, i))
1056                 errno = -EFAULT;
1057         up_read(&uts_sem);
1058         return errno;
1059 }
1060 
1061 /*
1062  * Only setdomainname; getdomainname can be implemented by calling
1063  * uname()
1064  */
1065 asmlinkage long sys_setdomainname(char *name, int len)
1066 {
1067         int errno;
1068 
1069         if (!capable(CAP_SYS_ADMIN))
1070                 return -EPERM;
1071         if (len < 0 || len > __NEW_UTS_LEN)
1072                 return -EINVAL;
1073 
1074         down_write(&uts_sem);
1075         errno = -EFAULT;
1076         if (!copy_from_user(system_utsname.domainname, name, len)) {
1077                 errno = 0;
1078                 system_utsname.domainname[len] = 0;
1079         }
1080         up_write(&uts_sem);
1081         return errno;
1082 }
1083 
1084 asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit *rlim)
1085 {
1086         if (resource >= RLIM_NLIMITS)
1087                 return -EINVAL;
1088         else
1089                 return copy_to_user(rlim, current->rlim + resource, sizeof(*rlim))
1090                         ? -EFAULT : 0;
1091 }
1092 
1093 #if !defined(__ia64__) 
1094 
1095 /*
1096  *      Back compatibility for getrlimit. Needed for some apps.
1097  */
1098  
1099 asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit *rlim)
1100 {
1101         struct rlimit x;
1102         if (resource >= RLIM_NLIMITS)
1103                 return -EINVAL;
1104 
1105         memcpy(&x, current->rlim + resource, sizeof(*rlim));
1106         if(x.rlim_cur > 0x7FFFFFFF)
1107                 x.rlim_cur = 0x7FFFFFFF;
1108         if(x.rlim_max > 0x7FFFFFFF)
1109                 x.rlim_max = 0x7FFFFFFF;
1110         return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
1111 }
1112 
1113 #endif
1114 
1115 asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit *rlim)
1116 {
1117         struct rlimit new_rlim, *old_rlim;
1118 
1119         if (resource >= RLIM_NLIMITS)
1120                 return -EINVAL;
1121         if(copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
1122                 return -EFAULT;
1123         old_rlim = current->rlim + resource;
1124         if (((new_rlim.rlim_cur > old_rlim->rlim_max) ||
1125              (new_rlim.rlim_max > old_rlim->rlim_max)) &&
1126             !capable(CAP_SYS_RESOURCE))
1127                 return -EPERM;
1128         if (resource == RLIMIT_NOFILE) {
1129                 if (new_rlim.rlim_cur > NR_OPEN || new_rlim.rlim_max > NR_OPEN)
1130                         return -EPERM;
1131         }
1132         *old_rlim = new_rlim;
1133         return 0;
1134 }
1135 
1136 /*
1137  * It would make sense to put struct rusage in the task_struct,
1138  * except that would make the task_struct be *really big*.  After
1139  * task_struct gets moved into malloc'ed memory, it would
1140  * make sense to do this.  It will make moving the rest of the information
1141  * a lot simpler!  (Which we're not doing right now because we're not
1142  * measuring them yet).
1143  *
1144  * This is SMP safe.  Either we are called from sys_getrusage on ourselves
1145  * below (we know we aren't going to exit/disappear and only we change our
1146  * rusage counters), or we are called from wait4() on a process which is
1147  * either stopped or zombied.  In the zombied case the task won't get
1148  * reaped till shortly after the call to getrusage(), in both cases the
1149  * task being examined is in a frozen state so the counters won't change.
1150  *
1151  * FIXME! Get the fault counts properly!
1152  */
1153 int getrusage(struct task_struct *p, int who, struct rusage *ru)
1154 {
1155         struct rusage r;
1156 
1157         memset((char *) &r, 0, sizeof(r));
1158         switch (who) {
1159                 case RUSAGE_SELF:
1160                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime);
1161                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime);
1162                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime);
1163                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime);
1164                         r.ru_minflt = p->min_flt;
1165                         r.ru_majflt = p->maj_flt;
1166                         r.ru_nswap = p->nswap;
1167                         break;
1168                 case RUSAGE_CHILDREN:
1169                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_cutime);
1170                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_cutime);
1171                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_cstime);
1172                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_cstime);
1173                         r.ru_minflt = p->cmin_flt;
1174                         r.ru_majflt = p->cmaj_flt;
1175                         r.ru_nswap = p->cnswap;
1176                         break;
1177                 default:
1178                         r.ru_utime.tv_sec = CT_TO_SECS(p->times.tms_utime + p->times.tms_cutime);
1179                         r.ru_utime.tv_usec = CT_TO_USECS(p->times.tms_utime + p->times.tms_cutime);
1180                         r.ru_stime.tv_sec = CT_TO_SECS(p->times.tms_stime + p->times.tms_cstime);
1181                         r.ru_stime.tv_usec = CT_TO_USECS(p->times.tms_stime + p->times.tms_cstime);
1182                         r.ru_minflt = p->min_flt + p->cmin_flt;
1183                         r.ru_majflt = p->maj_flt + p->cmaj_flt;
1184                         r.ru_nswap = p->nswap + p->cnswap;
1185                         break;
1186         }
1187         return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
1188 }
1189 
1190 asmlinkage long sys_getrusage(int who, struct rusage *ru)
1191 {
1192         if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN)
1193                 return -EINVAL;
1194         return getrusage(current, who, ru);
1195 }
1196 
1197 asmlinkage long sys_umask(int mask)
1198 {
1199         mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
1200         return mask;
1201 }
1202     
1203 asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
1204                           unsigned long arg4, unsigned long arg5)
1205 {
1206         int error = 0;
1207         int sig;
1208 
1209         switch (option) {
1210                 case PR_SET_PDEATHSIG:
1211                         sig = arg2;
1212                         if (sig < 0 || sig > _NSIG) {
1213                                 error = -EINVAL;
1214                                 break;
1215                         }
1216                         current->pdeath_signal = sig;
1217                         break;
1218                 case PR_GET_PDEATHSIG:
1219                         error = put_user(current->pdeath_signal, (int *)arg2);
1220                         break;
1221                 case PR_GET_DUMPABLE:
1222                         if (current->mm->dumpable)
1223                                 error = 1;
1224                         break;
1225                 case PR_SET_DUMPABLE:
1226                         if (arg2 != 0 && arg2 != 1) {
1227                                 error = -EINVAL;
1228                                 break;
1229                         }
1230                         current->mm->dumpable = arg2;
1231                         break;
1232                 case PR_SET_UNALIGN:
1233 #ifdef SET_UNALIGN_CTL
1234                         error = SET_UNALIGN_CTL(current, arg2);
1235 #else
1236                         error = -EINVAL;
1237 #endif
1238                         break;
1239 
1240                 case PR_GET_UNALIGN:
1241 #ifdef GET_UNALIGN_CTL
1242                         error = GET_UNALIGN_CTL(current, arg2);
1243 #else
1244                         error = -EINVAL;
1245 #endif
1246                         break;
1247 
1248                 case PR_SET_FPEMU:
1249 #ifdef SET_FPEMU_CTL
1250                         error = SET_FPEMU_CTL(current, arg2);
1251 #else
1252                         error = -EINVAL;
1253 #endif
1254                         break;
1255 
1256                 case PR_GET_FPEMU:
1257 #ifdef GET_FPEMU_CTL
1258                         error = GET_FPEMU_CTL(current, arg2);
1259 #else
1260                         error = -EINVAL;
1261 #endif
1262                         break;
1263 
1264                 case PR_GET_KEEPCAPS:
1265                         if (current->keep_capabilities)
1266                                 error = 1;
1267                         break;
1268                 case PR_SET_KEEPCAPS:
1269                         if (arg2 != 0 && arg2 != 1) {
1270                                 error = -EINVAL;
1271                                 break;
1272                         }
1273                         current->keep_capabilities = arg2;
1274                         break;
1275                 default:
1276                         error = -EINVAL;
1277                         break;
1278         }
1279         return error;
1280 }
1281 
1282 EXPORT_SYMBOL(notifier_chain_register);
1283 EXPORT_SYMBOL(notifier_chain_unregister);
1284 EXPORT_SYMBOL(notifier_call_chain);
1285 EXPORT_SYMBOL(register_reboot_notifier);
1286 EXPORT_SYMBOL(unregister_reboot_notifier);
1287 EXPORT_SYMBOL(in_group_p);
1288 EXPORT_SYMBOL(in_egroup_p);
1289