Magellan Linux

Annotation of /alx-src/tags/kernel26-2.6.12-alx-r9/kernel/timer.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 630 - (hide annotations) (download)
Wed Mar 4 11:03:09 2009 UTC (15 years, 3 months ago) by niro
File MIME type: text/plain
File size: 42801 byte(s)
Tag kernel26-2.6.12-alx-r9
1 niro 628 /*
2     * linux/kernel/timer.c
3     *
4     * Kernel internal timers, kernel timekeeping, basic process system calls
5     *
6     * Copyright (C) 1991, 1992 Linus Torvalds
7     *
8     * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
9     *
10     * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
11     * "A Kernel Model for Precision Timekeeping" by Dave Mills
12     * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
13     * serialize accesses to xtime/lost_ticks).
14     * Copyright (C) 1998 Andrea Arcangeli
15     * 1999-03-10 Improved NTP compatibility by Ulrich Windl
16     * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love
17     * 2000-10-05 Implemented scalable SMP per-CPU timer handling.
18     * Copyright (C) 2000, 2001, 2002 Ingo Molnar
19     * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar
20     */
21    
22     #include <linux/kernel_stat.h>
23     #include <linux/module.h>
24     #include <linux/interrupt.h>
25     #include <linux/percpu.h>
26     #include <linux/init.h>
27     #include <linux/mm.h>
28     #include <linux/swap.h>
29     #include <linux/notifier.h>
30     #include <linux/thread_info.h>
31     #include <linux/time.h>
32     #include <linux/jiffies.h>
33     #include <linux/posix-timers.h>
34     #include <linux/cpu.h>
35     #include <linux/syscalls.h>
36    
37     #include <asm/uaccess.h>
38     #include <asm/unistd.h>
39     #include <asm/div64.h>
40     #include <asm/timex.h>
41     #include <asm/io.h>
42    
43     #ifdef CONFIG_TIME_INTERPOLATION
44     static void time_interpolator_update(long delta_nsec);
45     #else
46     #define time_interpolator_update(x)
47     #endif
48    
49     /*
50     * per-CPU timer vector definitions:
51     */
52    
53     #define TVN_BITS (CONFIG_BASE_SMALL ? 4 : 6)
54     #define TVR_BITS (CONFIG_BASE_SMALL ? 6 : 8)
55     #define TVN_SIZE (1 << TVN_BITS)
56     #define TVR_SIZE (1 << TVR_BITS)
57     #define TVN_MASK (TVN_SIZE - 1)
58     #define TVR_MASK (TVR_SIZE - 1)
59    
60     typedef struct tvec_s {
61     struct list_head vec[TVN_SIZE];
62     } tvec_t;
63    
64     typedef struct tvec_root_s {
65     struct list_head vec[TVR_SIZE];
66     } tvec_root_t;
67    
68     struct tvec_t_base_s {
69     spinlock_t lock;
70     unsigned long timer_jiffies;
71     struct timer_list *running_timer;
72     tvec_root_t tv1;
73     tvec_t tv2;
74     tvec_t tv3;
75     tvec_t tv4;
76     tvec_t tv5;
77     } ____cacheline_aligned_in_smp;
78    
79     typedef struct tvec_t_base_s tvec_base_t;
80    
81     static inline void set_running_timer(tvec_base_t *base,
82     struct timer_list *timer)
83     {
84     #ifdef CONFIG_SMP
85     base->running_timer = timer;
86     #endif
87     }
88    
89     /* Fake initialization */
90     static DEFINE_PER_CPU(tvec_base_t, tvec_bases) = { SPIN_LOCK_UNLOCKED };
91    
92     static void check_timer_failed(struct timer_list *timer)
93     {
94     static int whine_count;
95     if (whine_count < 16) {
96     whine_count++;
97     printk("Uninitialised timer!\n");
98     printk("This is just a warning. Your computer is OK\n");
99     printk("function=0x%p, data=0x%lx\n",
100     timer->function, timer->data);
101     dump_stack();
102     }
103     /*
104     * Now fix it up
105     */
106     spin_lock_init(&timer->lock);
107     timer->magic = TIMER_MAGIC;
108     }
109    
110     static inline void check_timer(struct timer_list *timer)
111     {
112     if (timer->magic != TIMER_MAGIC)
113     check_timer_failed(timer);
114     }
115    
116    
117     static void internal_add_timer(tvec_base_t *base, struct timer_list *timer)
118     {
119     unsigned long expires = timer->expires;
120     unsigned long idx = expires - base->timer_jiffies;
121     struct list_head *vec;
122    
123     if (idx < TVR_SIZE) {
124     int i = expires & TVR_MASK;
125     vec = base->tv1.vec + i;
126     } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
127     int i = (expires >> TVR_BITS) & TVN_MASK;
128     vec = base->tv2.vec + i;
129     } else if (idx < 1 << (TVR_BITS + 2 * TVN_BITS)) {
130     int i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
131     vec = base->tv3.vec + i;
132     } else if (idx < 1 << (TVR_BITS + 3 * TVN_BITS)) {
133     int i = (expires >> (TVR_BITS + 2 * TVN_BITS)) & TVN_MASK;
134     vec = base->tv4.vec + i;
135     } else if ((signed long) idx < 0) {
136     /*
137     * Can happen if you add a timer with expires == jiffies,
138     * or you set a timer to go off in the past
139     */
140     vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
141     } else {
142     int i;
143     /* If the timeout is larger than 0xffffffff on 64-bit
144     * architectures then we use the maximum timeout:
145     */
146     if (idx > 0xffffffffUL) {
147     idx = 0xffffffffUL;
148     expires = idx + base->timer_jiffies;
149     }
150     i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
151     vec = base->tv5.vec + i;
152     }
153     /*
154     * Timers are FIFO:
155     */
156     list_add_tail(&timer->entry, vec);
157     }
158    
159     int __mod_timer(struct timer_list *timer, unsigned long expires)
160     {
161     tvec_base_t *old_base, *new_base;
162     unsigned long flags;
163     int ret = 0;
164    
165     BUG_ON(!timer->function);
166    
167     check_timer(timer);
168    
169     spin_lock_irqsave(&timer->lock, flags);
170     new_base = &__get_cpu_var(tvec_bases);
171     repeat:
172     old_base = timer->base;
173    
174     /*
175     * Prevent deadlocks via ordering by old_base < new_base.
176     */
177     if (old_base && (new_base != old_base)) {
178     if (old_base < new_base) {
179     spin_lock(&new_base->lock);
180     spin_lock(&old_base->lock);
181     } else {
182     spin_lock(&old_base->lock);
183     spin_lock(&new_base->lock);
184     }
185     /*
186     * The timer base might have been cancelled while we were
187     * trying to take the lock(s):
188     */
189     if (timer->base != old_base) {
190     spin_unlock(&new_base->lock);
191     spin_unlock(&old_base->lock);
192     goto repeat;
193     }
194     } else {
195     spin_lock(&new_base->lock);
196     if (timer->base != old_base) {
197     spin_unlock(&new_base->lock);
198     goto repeat;
199     }
200     }
201    
202     /*
203     * Delete the previous timeout (if there was any), and install
204     * the new one:
205     */
206     if (old_base) {
207     list_del(&timer->entry);
208     ret = 1;
209     }
210     timer->expires = expires;
211     internal_add_timer(new_base, timer);
212     timer->base = new_base;
213    
214     if (old_base && (new_base != old_base))
215     spin_unlock(&old_base->lock);
216     spin_unlock(&new_base->lock);
217     spin_unlock_irqrestore(&timer->lock, flags);
218    
219     return ret;
220     }
221    
222     EXPORT_SYMBOL(__mod_timer);
223    
224     /***
225     * add_timer_on - start a timer on a particular CPU
226     * @timer: the timer to be added
227     * @cpu: the CPU to start it on
228     *
229     * This is not very scalable on SMP. Double adds are not possible.
230     */
231     void add_timer_on(struct timer_list *timer, int cpu)
232     {
233     tvec_base_t *base = &per_cpu(tvec_bases, cpu);
234     unsigned long flags;
235    
236     BUG_ON(timer_pending(timer) || !timer->function);
237    
238     check_timer(timer);
239    
240     spin_lock_irqsave(&base->lock, flags);
241     internal_add_timer(base, timer);
242     timer->base = base;
243     spin_unlock_irqrestore(&base->lock, flags);
244     }
245    
246    
247     /***
248     * mod_timer - modify a timer's timeout
249     * @timer: the timer to be modified
250     *
251     * mod_timer is a more efficient way to update the expire field of an
252     * active timer (if the timer is inactive it will be activated)
253     *
254     * mod_timer(timer, expires) is equivalent to:
255     *
256     * del_timer(timer); timer->expires = expires; add_timer(timer);
257     *
258     * Note that if there are multiple unserialized concurrent users of the
259     * same timer, then mod_timer() is the only safe way to modify the timeout,
260     * since add_timer() cannot modify an already running timer.
261     *
262     * The function returns whether it has modified a pending timer or not.
263     * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an
264     * active timer returns 1.)
265     */
266     int mod_timer(struct timer_list *timer, unsigned long expires)
267     {
268     BUG_ON(!timer->function);
269    
270     check_timer(timer);
271    
272     /*
273     * This is a common optimization triggered by the
274     * networking code - if the timer is re-modified
275     * to be the same thing then just return:
276     */
277     if (timer->expires == expires && timer_pending(timer))
278     return 1;
279    
280     return __mod_timer(timer, expires);
281     }
282    
283     EXPORT_SYMBOL(mod_timer);
284    
285     /***
286     * del_timer - deactive a timer.
287     * @timer: the timer to be deactivated
288     *
289     * del_timer() deactivates a timer - this works on both active and inactive
290     * timers.
291     *
292     * The function returns whether it has deactivated a pending timer or not.
293     * (ie. del_timer() of an inactive timer returns 0, del_timer() of an
294     * active timer returns 1.)
295     */
296     int del_timer(struct timer_list *timer)
297     {
298     unsigned long flags;
299     tvec_base_t *base;
300    
301     check_timer(timer);
302    
303     repeat:
304     base = timer->base;
305     if (!base)
306     return 0;
307     spin_lock_irqsave(&base->lock, flags);
308     if (base != timer->base) {
309     spin_unlock_irqrestore(&base->lock, flags);
310     goto repeat;
311     }
312     list_del(&timer->entry);
313     /* Need to make sure that anybody who sees a NULL base also sees the list ops */
314     smp_wmb();
315     timer->base = NULL;
316     spin_unlock_irqrestore(&base->lock, flags);
317    
318     return 1;
319     }
320    
321     EXPORT_SYMBOL(del_timer);
322    
323     #ifdef CONFIG_SMP
324     /***
325     * del_timer_sync - deactivate a timer and wait for the handler to finish.
326     * @timer: the timer to be deactivated
327     *
328     * This function only differs from del_timer() on SMP: besides deactivating
329     * the timer it also makes sure the handler has finished executing on other
330     * CPUs.
331     *
332     * Synchronization rules: callers must prevent restarting of the timer,
333     * otherwise this function is meaningless. It must not be called from
334     * interrupt contexts. The caller must not hold locks which would prevent
335     * completion of the timer's handler. Upon exit the timer is not queued and
336     * the handler is not running on any CPU.
337     *
338     * The function returns whether it has deactivated a pending timer or not.
339     *
340     * del_timer_sync() is slow and complicated because it copes with timer
341     * handlers which re-arm the timer (periodic timers). If the timer handler
342     * is known to not do this (a single shot timer) then use
343     * del_singleshot_timer_sync() instead.
344     */
345     int del_timer_sync(struct timer_list *timer)
346     {
347     tvec_base_t *base;
348     int i, ret = 0;
349    
350     check_timer(timer);
351    
352     del_again:
353     ret += del_timer(timer);
354    
355     for_each_online_cpu(i) {
356     base = &per_cpu(tvec_bases, i);
357     if (base->running_timer == timer) {
358     while (base->running_timer == timer) {
359     cpu_relax();
360     preempt_check_resched();
361     }
362     break;
363     }
364     }
365     smp_rmb();
366     if (timer_pending(timer))
367     goto del_again;
368    
369     return ret;
370     }
371     EXPORT_SYMBOL(del_timer_sync);
372    
373     /***
374     * del_singleshot_timer_sync - deactivate a non-recursive timer
375     * @timer: the timer to be deactivated
376     *
377     * This function is an optimization of del_timer_sync for the case where the
378     * caller can guarantee the timer does not reschedule itself in its timer
379     * function.
380     *
381     * Synchronization rules: callers must prevent restarting of the timer,
382     * otherwise this function is meaningless. It must not be called from
383     * interrupt contexts. The caller must not hold locks which wold prevent
384     * completion of the timer's handler. Upon exit the timer is not queued and
385     * the handler is not running on any CPU.
386     *
387     * The function returns whether it has deactivated a pending timer or not.
388     */
389     int del_singleshot_timer_sync(struct timer_list *timer)
390     {
391     int ret = del_timer(timer);
392    
393     if (!ret) {
394     ret = del_timer_sync(timer);
395     BUG_ON(ret);
396     }
397    
398     return ret;
399     }
400     EXPORT_SYMBOL(del_singleshot_timer_sync);
401     #endif
402    
403     static int cascade(tvec_base_t *base, tvec_t *tv, int index)
404     {
405     /* cascade all the timers from tv up one level */
406     struct list_head *head, *curr;
407    
408     head = tv->vec + index;
409     curr = head->next;
410     /*
411     * We are removing _all_ timers from the list, so we don't have to
412     * detach them individually, just clear the list afterwards.
413     */
414     while (curr != head) {
415     struct timer_list *tmp;
416    
417     tmp = list_entry(curr, struct timer_list, entry);
418     BUG_ON(tmp->base != base);
419     curr = curr->next;
420     internal_add_timer(base, tmp);
421     }
422     INIT_LIST_HEAD(head);
423    
424     return index;
425     }
426    
427     /***
428     * __run_timers - run all expired timers (if any) on this CPU.
429     * @base: the timer vector to be processed.
430     *
431     * This function cascades all vectors and executes all expired timer
432     * vectors.
433     */
434     #define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK
435    
436     static inline void __run_timers(tvec_base_t *base)
437     {
438     struct timer_list *timer;
439    
440     spin_lock_irq(&base->lock);
441     while (time_after_eq(jiffies, base->timer_jiffies)) {
442     struct list_head work_list = LIST_HEAD_INIT(work_list);
443     struct list_head *head = &work_list;
444     int index = base->timer_jiffies & TVR_MASK;
445    
446     /*
447     * Cascade timers:
448     */
449     if (!index &&
450     (!cascade(base, &base->tv2, INDEX(0))) &&
451     (!cascade(base, &base->tv3, INDEX(1))) &&
452     !cascade(base, &base->tv4, INDEX(2)))
453     cascade(base, &base->tv5, INDEX(3));
454     ++base->timer_jiffies;
455     list_splice_init(base->tv1.vec + index, &work_list);
456     repeat:
457     if (!list_empty(head)) {
458     void (*fn)(unsigned long);
459     unsigned long data;
460    
461     timer = list_entry(head->next,struct timer_list,entry);
462     fn = timer->function;
463     data = timer->data;
464    
465     list_del(&timer->entry);
466     set_running_timer(base, timer);
467     smp_wmb();
468     timer->base = NULL;
469     spin_unlock_irq(&base->lock);
470     {
471     u32 preempt_count = preempt_count();
472     fn(data);
473     if (preempt_count != preempt_count()) {
474     printk("huh, entered %p with %08x, exited with %08x?\n", fn, preempt_count, preempt_count());
475     BUG();
476     }
477     }
478     spin_lock_irq(&base->lock);
479     goto repeat;
480     }
481     }
482     set_running_timer(base, NULL);
483     spin_unlock_irq(&base->lock);
484     }
485    
486     #ifdef CONFIG_NO_IDLE_HZ
487     /*
488     * Find out when the next timer event is due to happen. This
489     * is used on S/390 to stop all activity when a cpus is idle.
490     * This functions needs to be called disabled.
491     */
492     unsigned long next_timer_interrupt(void)
493     {
494     tvec_base_t *base;
495     struct list_head *list;
496     struct timer_list *nte;
497     unsigned long expires;
498     tvec_t *varray[4];
499     int i, j;
500    
501     base = &__get_cpu_var(tvec_bases);
502     spin_lock(&base->lock);
503     expires = base->timer_jiffies + (LONG_MAX >> 1);
504     list = 0;
505    
506     /* Look for timer events in tv1. */
507     j = base->timer_jiffies & TVR_MASK;
508     do {
509     list_for_each_entry(nte, base->tv1.vec + j, entry) {
510     expires = nte->expires;
511     if (j < (base->timer_jiffies & TVR_MASK))
512     list = base->tv2.vec + (INDEX(0));
513     goto found;
514     }
515     j = (j + 1) & TVR_MASK;
516     } while (j != (base->timer_jiffies & TVR_MASK));
517    
518     /* Check tv2-tv5. */
519     varray[0] = &base->tv2;
520     varray[1] = &base->tv3;
521     varray[2] = &base->tv4;
522     varray[3] = &base->tv5;
523     for (i = 0; i < 4; i++) {
524     j = INDEX(i);
525     do {
526     if (list_empty(varray[i]->vec + j)) {
527     j = (j + 1) & TVN_MASK;
528     continue;
529     }
530     list_for_each_entry(nte, varray[i]->vec + j, entry)
531     if (time_before(nte->expires, expires))
532     expires = nte->expires;
533     if (j < (INDEX(i)) && i < 3)
534     list = varray[i + 1]->vec + (INDEX(i + 1));
535     goto found;
536     } while (j != (INDEX(i)));
537     }
538     found:
539     if (list) {
540     /*
541     * The search wrapped. We need to look at the next list
542     * from next tv element that would cascade into tv element
543     * where we found the timer element.
544     */
545     list_for_each_entry(nte, list, entry) {
546     if (time_before(nte->expires, expires))
547     expires = nte->expires;
548     }
549     }
550     spin_unlock(&base->lock);
551     return expires;
552     }
553     #endif
554    
555     /******************************************************************/
556    
557     /*
558     * Timekeeping variables
559     */
560     unsigned long tick_usec = TICK_USEC; /* USER_HZ period (usec) */
561     unsigned long tick_nsec = TICK_NSEC; /* ACTHZ period (nsec) */
562    
563     /*
564     * The current time
565     * wall_to_monotonic is what we need to add to xtime (or xtime corrected
566     * for sub jiffie times) to get to monotonic time. Monotonic is pegged
567     * at zero at system boot time, so wall_to_monotonic will be negative,
568     * however, we will ALWAYS keep the tv_nsec part positive so we can use
569     * the usual normalization.
570     */
571     struct timespec xtime __attribute__ ((aligned (16)));
572     struct timespec wall_to_monotonic __attribute__ ((aligned (16)));
573    
574     EXPORT_SYMBOL(xtime);
575    
576     /* Don't completely fail for HZ > 500. */
577     int tickadj = 500/HZ ? : 1; /* microsecs */
578    
579    
580     /*
581     * phase-lock loop variables
582     */
583     /* TIME_ERROR prevents overwriting the CMOS clock */
584     int time_state = TIME_OK; /* clock synchronization status */
585     int time_status = STA_UNSYNC; /* clock status bits */
586     long time_offset; /* time adjustment (us) */
587     long time_constant = 2; /* pll time constant */
588     long time_tolerance = MAXFREQ; /* frequency tolerance (ppm) */
589     long time_precision = 1; /* clock precision (us) */
590     long time_maxerror = NTP_PHASE_LIMIT; /* maximum error (us) */
591     long time_esterror = NTP_PHASE_LIMIT; /* estimated error (us) */
592     static long time_phase; /* phase offset (scaled us) */
593     long time_freq = (((NSEC_PER_SEC + HZ/2) % HZ - HZ/2) << SHIFT_USEC) / NSEC_PER_USEC;
594     /* frequency offset (scaled ppm)*/
595     static long time_adj; /* tick adjust (scaled 1 / HZ) */
596     long time_reftime; /* time at last adjustment (s) */
597     long time_adjust;
598     long time_next_adjust;
599    
600     /*
601     * this routine handles the overflow of the microsecond field
602     *
603     * The tricky bits of code to handle the accurate clock support
604     * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
605     * They were originally developed for SUN and DEC kernels.
606     * All the kudos should go to Dave for this stuff.
607     *
608     */
609     static void second_overflow(void)
610     {
611     long ltemp;
612    
613     /* Bump the maxerror field */
614     time_maxerror += time_tolerance >> SHIFT_USEC;
615     if ( time_maxerror > NTP_PHASE_LIMIT ) {
616     time_maxerror = NTP_PHASE_LIMIT;
617     time_status |= STA_UNSYNC;
618     }
619    
620     /*
621     * Leap second processing. If in leap-insert state at
622     * the end of the day, the system clock is set back one
623     * second; if in leap-delete state, the system clock is
624     * set ahead one second. The microtime() routine or
625     * external clock driver will insure that reported time
626     * is always monotonic. The ugly divides should be
627     * replaced.
628     */
629     switch (time_state) {
630    
631     case TIME_OK:
632     if (time_status & STA_INS)
633     time_state = TIME_INS;
634     else if (time_status & STA_DEL)
635     time_state = TIME_DEL;
636     break;
637    
638     case TIME_INS:
639     if (xtime.tv_sec % 86400 == 0) {
640     xtime.tv_sec--;
641     wall_to_monotonic.tv_sec++;
642     /* The timer interpolator will make time change gradually instead
643     * of an immediate jump by one second.
644     */
645     time_interpolator_update(-NSEC_PER_SEC);
646     time_state = TIME_OOP;
647     clock_was_set();
648     printk(KERN_NOTICE "Clock: inserting leap second 23:59:60 UTC\n");
649     }
650     break;
651    
652     case TIME_DEL:
653     if ((xtime.tv_sec + 1) % 86400 == 0) {
654     xtime.tv_sec++;
655     wall_to_monotonic.tv_sec--;
656     /* Use of time interpolator for a gradual change of time */
657     time_interpolator_update(NSEC_PER_SEC);
658     time_state = TIME_WAIT;
659     clock_was_set();
660     printk(KERN_NOTICE "Clock: deleting leap second 23:59:59 UTC\n");
661     }
662     break;
663    
664     case TIME_OOP:
665     time_state = TIME_WAIT;
666     break;
667    
668     case TIME_WAIT:
669     if (!(time_status & (STA_INS | STA_DEL)))
670     time_state = TIME_OK;
671     }
672    
673     /*
674     * Compute the phase adjustment for the next second. In
675     * PLL mode, the offset is reduced by a fixed factor
676     * times the time constant. In FLL mode the offset is
677     * used directly. In either mode, the maximum phase
678     * adjustment for each second is clamped so as to spread
679     * the adjustment over not more than the number of
680     * seconds between updates.
681     */
682     if (time_offset < 0) {
683     ltemp = -time_offset;
684     if (!(time_status & STA_FLL))
685     ltemp >>= SHIFT_KG + time_constant;
686     if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
687     ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
688     time_offset += ltemp;
689     time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
690     } else {
691     ltemp = time_offset;
692     if (!(time_status & STA_FLL))
693     ltemp >>= SHIFT_KG + time_constant;
694     if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
695     ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
696     time_offset -= ltemp;
697     time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
698     }
699    
700     /*
701     * Compute the frequency estimate and additional phase
702     * adjustment due to frequency error for the next
703     * second. When the PPS signal is engaged, gnaw on the
704     * watchdog counter and update the frequency computed by
705     * the pll and the PPS signal.
706     */
707     pps_valid++;
708     if (pps_valid == PPS_VALID) { /* PPS signal lost */
709     pps_jitter = MAXTIME;
710     pps_stabil = MAXFREQ;
711     time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
712     STA_PPSWANDER | STA_PPSERROR);
713     }
714     ltemp = time_freq + pps_freq;
715     if (ltemp < 0)
716     time_adj -= -ltemp >>
717     (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
718     else
719     time_adj += ltemp >>
720     (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
721    
722     #if HZ == 100
723     /* Compensate for (HZ==100) != (1 << SHIFT_HZ).
724     * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14)
725     */
726     if (time_adj < 0)
727     time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
728     else
729     time_adj += (time_adj >> 2) + (time_adj >> 5);
730     #endif
731     #if HZ == 1000
732     /* Compensate for (HZ==1000) != (1 << SHIFT_HZ).
733     * Add 1.5625% and 0.78125% to get 1023.4375; => only 0.05% error (p. 14)
734     */
735     if (time_adj < 0)
736     time_adj -= (-time_adj >> 6) + (-time_adj >> 7);
737     else
738     time_adj += (time_adj >> 6) + (time_adj >> 7);
739     #endif
740     }
741    
742     /* in the NTP reference this is called "hardclock()" */
743     static void update_wall_time_one_tick(void)
744     {
745     long time_adjust_step, delta_nsec;
746    
747     if ( (time_adjust_step = time_adjust) != 0 ) {
748     /* We are doing an adjtime thing.
749     *
750     * Prepare time_adjust_step to be within bounds.
751     * Note that a positive time_adjust means we want the clock
752     * to run faster.
753     *
754     * Limit the amount of the step to be in the range
755     * -tickadj .. +tickadj
756     */
757     if (time_adjust > tickadj)
758     time_adjust_step = tickadj;
759     else if (time_adjust < -tickadj)
760     time_adjust_step = -tickadj;
761    
762     /* Reduce by this step the amount of time left */
763     time_adjust -= time_adjust_step;
764     }
765     delta_nsec = tick_nsec + time_adjust_step * 1000;
766     /*
767     * Advance the phase, once it gets to one microsecond, then
768     * advance the tick more.
769     */
770     time_phase += time_adj;
771     if (time_phase <= -FINENSEC) {
772     long ltemp = -time_phase >> (SHIFT_SCALE - 10);
773     time_phase += ltemp << (SHIFT_SCALE - 10);
774     delta_nsec -= ltemp;
775     }
776     else if (time_phase >= FINENSEC) {
777     long ltemp = time_phase >> (SHIFT_SCALE - 10);
778     time_phase -= ltemp << (SHIFT_SCALE - 10);
779     delta_nsec += ltemp;
780     }
781     xtime.tv_nsec += delta_nsec;
782     time_interpolator_update(delta_nsec);
783    
784     /* Changes by adjtime() do not take effect till next tick. */
785     if (time_next_adjust != 0) {
786     time_adjust = time_next_adjust;
787     time_next_adjust = 0;
788     }
789     }
790    
791     /*
792     * Using a loop looks inefficient, but "ticks" is
793     * usually just one (we shouldn't be losing ticks,
794     * we're doing this this way mainly for interrupt
795     * latency reasons, not because we think we'll
796     * have lots of lost timer ticks
797     */
798     static void update_wall_time(unsigned long ticks)
799     {
800     do {
801     ticks--;
802     update_wall_time_one_tick();
803     if (xtime.tv_nsec >= 1000000000) {
804     xtime.tv_nsec -= 1000000000;
805     xtime.tv_sec++;
806     second_overflow();
807     }
808     } while (ticks);
809     }
810    
811     /*
812     * Called from the timer interrupt handler to charge one tick to the current
813     * process. user_tick is 1 if the tick is user time, 0 for system.
814     */
815     void update_process_times(int user_tick)
816     {
817     struct task_struct *p = current;
818     int cpu = smp_processor_id();
819    
820     /* Note: this timer irq context must be accounted for as well. */
821     if (user_tick)
822     account_user_time(p, jiffies_to_cputime(1));
823     else
824     account_system_time(p, HARDIRQ_OFFSET, jiffies_to_cputime(1));
825     run_local_timers();
826     if (rcu_pending(cpu))
827     rcu_check_callbacks(cpu, user_tick);
828     scheduler_tick();
829     run_posix_cpu_timers(p);
830     }
831    
832     /*
833     * Nr of active tasks - counted in fixed-point numbers
834     */
835     static unsigned long count_active_tasks(void)
836     {
837     return (nr_running() + nr_uninterruptible()) * FIXED_1;
838     }
839    
840     /*
841     * Hmm.. Changed this, as the GNU make sources (load.c) seems to
842     * imply that avenrun[] is the standard name for this kind of thing.
843     * Nothing else seems to be standardized: the fractional size etc
844     * all seem to differ on different machines.
845     *
846     * Requires xtime_lock to access.
847     */
848     unsigned long avenrun[3];
849    
850     EXPORT_SYMBOL(avenrun);
851    
852     /*
853     * calc_load - given tick count, update the avenrun load estimates.
854     * This is called while holding a write_lock on xtime_lock.
855     */
856     static inline void calc_load(unsigned long ticks)
857     {
858     unsigned long active_tasks; /* fixed-point */
859     static int count = LOAD_FREQ;
860    
861     count -= ticks;
862     if (count < 0) {
863     count += LOAD_FREQ;
864     active_tasks = count_active_tasks();
865     CALC_LOAD(avenrun[0], EXP_1, active_tasks);
866     CALC_LOAD(avenrun[1], EXP_5, active_tasks);
867     CALC_LOAD(avenrun[2], EXP_15, active_tasks);
868     }
869     }
870    
871     /* jiffies at the most recent update of wall time */
872     unsigned long wall_jiffies = INITIAL_JIFFIES;
873    
874     /*
875     * This read-write spinlock protects us from races in SMP while
876     * playing with xtime and avenrun.
877     */
878     #ifndef ARCH_HAVE_XTIME_LOCK
879     seqlock_t xtime_lock __cacheline_aligned_in_smp = SEQLOCK_UNLOCKED;
880    
881     EXPORT_SYMBOL(xtime_lock);
882     #endif
883    
884     /*
885     * This function runs timers and the timer-tq in bottom half context.
886     */
887     static void run_timer_softirq(struct softirq_action *h)
888     {
889     tvec_base_t *base = &__get_cpu_var(tvec_bases);
890    
891     if (time_after_eq(jiffies, base->timer_jiffies))
892     __run_timers(base);
893     }
894    
895     /*
896     * Called by the local, per-CPU timer interrupt on SMP.
897     */
898     void run_local_timers(void)
899     {
900     raise_softirq(TIMER_SOFTIRQ);
901     }
902    
903     /*
904     * Called by the timer interrupt. xtime_lock must already be taken
905     * by the timer IRQ!
906     */
907     static inline void update_times(void)
908     {
909     unsigned long ticks;
910    
911     ticks = jiffies - wall_jiffies;
912     if (ticks) {
913     wall_jiffies += ticks;
914     update_wall_time(ticks);
915     }
916     calc_load(ticks);
917     }
918    
919     /*
920     * The 64-bit jiffies value is not atomic - you MUST NOT read it
921     * without sampling the sequence number in xtime_lock.
922     * jiffies is defined in the linker script...
923     */
924    
925     void do_timer(struct pt_regs *regs)
926     {
927     jiffies_64++;
928     update_times();
929     }
930    
931     #ifdef __ARCH_WANT_SYS_ALARM
932    
933     /*
934     * For backwards compatibility? This can be done in libc so Alpha
935     * and all newer ports shouldn't need it.
936     */
937     asmlinkage unsigned long sys_alarm(unsigned int seconds)
938     {
939     struct itimerval it_new, it_old;
940     unsigned int oldalarm;
941    
942     it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
943     it_new.it_value.tv_sec = seconds;
944     it_new.it_value.tv_usec = 0;
945     do_setitimer(ITIMER_REAL, &it_new, &it_old);
946     oldalarm = it_old.it_value.tv_sec;
947     /* ehhh.. We can't return 0 if we have an alarm pending.. */
948     /* And we'd better return too much than too little anyway */
949     if ((!oldalarm && it_old.it_value.tv_usec) || it_old.it_value.tv_usec >= 500000)
950     oldalarm++;
951     return oldalarm;
952     }
953    
954     #endif
955    
956     #ifndef __alpha__
957    
958     /*
959     * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this
960     * should be moved into arch/i386 instead?
961     */
962    
963     /**
964     * sys_getpid - return the thread group id of the current process
965     *
966     * Note, despite the name, this returns the tgid not the pid. The tgid and
967     * the pid are identical unless CLONE_THREAD was specified on clone() in
968     * which case the tgid is the same in all threads of the same group.
969     *
970     * This is SMP safe as current->tgid does not change.
971     */
972     asmlinkage long sys_getpid(void)
973     {
974     return current->tgid;
975     }
976    
977     /*
978     * Accessing ->group_leader->real_parent is not SMP-safe, it could
979     * change from under us. However, rather than getting any lock
980     * we can use an optimistic algorithm: get the parent
981     * pid, and go back and check that the parent is still
982     * the same. If it has changed (which is extremely unlikely
983     * indeed), we just try again..
984     *
985     * NOTE! This depends on the fact that even if we _do_
986     * get an old value of "parent", we can happily dereference
987     * the pointer (it was and remains a dereferencable kernel pointer
988     * no matter what): we just can't necessarily trust the result
989     * until we know that the parent pointer is valid.
990     *
991     * NOTE2: ->group_leader never changes from under us.
992     */
993     asmlinkage long sys_getppid(void)
994     {
995     int pid;
996     struct task_struct *me = current;
997     struct task_struct *parent;
998    
999     parent = me->group_leader->real_parent;
1000     for (;;) {
1001     pid = parent->tgid;
1002     #ifdef CONFIG_SMP
1003     {
1004     struct task_struct *old = parent;
1005    
1006     /*
1007     * Make sure we read the pid before re-reading the
1008     * parent pointer:
1009     */
1010     smp_rmb();
1011     parent = me->group_leader->real_parent;
1012     if (old != parent)
1013     continue;
1014     }
1015     #endif
1016     break;
1017     }
1018     return pid;
1019     }
1020    
1021     asmlinkage long sys_getuid(void)
1022     {
1023     /* Only we change this so SMP safe */
1024     return current->uid;
1025     }
1026    
1027     asmlinkage long sys_geteuid(void)
1028     {
1029     /* Only we change this so SMP safe */
1030     return current->euid;
1031     }
1032    
1033     asmlinkage long sys_getgid(void)
1034     {
1035     /* Only we change this so SMP safe */
1036     return current->gid;
1037     }
1038    
1039     asmlinkage long sys_getegid(void)
1040     {
1041     /* Only we change this so SMP safe */
1042     return current->egid;
1043     }
1044    
1045     #endif
1046    
1047     static void process_timeout(unsigned long __data)
1048     {
1049     wake_up_process((task_t *)__data);
1050     }
1051    
1052     /**
1053     * schedule_timeout - sleep until timeout
1054     * @timeout: timeout value in jiffies
1055     *
1056     * Make the current task sleep until @timeout jiffies have
1057     * elapsed. The routine will return immediately unless
1058     * the current task state has been set (see set_current_state()).
1059     *
1060     * You can set the task state as follows -
1061     *
1062     * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to
1063     * pass before the routine returns. The routine will return 0
1064     *
1065     * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
1066     * delivered to the current task. In this case the remaining time
1067     * in jiffies will be returned, or 0 if the timer expired in time
1068     *
1069     * The current task state is guaranteed to be TASK_RUNNING when this
1070     * routine returns.
1071     *
1072     * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule
1073     * the CPU away without a bound on the timeout. In this case the return
1074     * value will be %MAX_SCHEDULE_TIMEOUT.
1075     *
1076     * In all cases the return value is guaranteed to be non-negative.
1077     */
1078     fastcall signed long __sched schedule_timeout(signed long timeout)
1079     {
1080     struct timer_list timer;
1081     unsigned long expire;
1082    
1083     switch (timeout)
1084     {
1085     case MAX_SCHEDULE_TIMEOUT:
1086     /*
1087     * These two special cases are useful to be comfortable
1088     * in the caller. Nothing more. We could take
1089     * MAX_SCHEDULE_TIMEOUT from one of the negative value
1090     * but I' d like to return a valid offset (>=0) to allow
1091     * the caller to do everything it want with the retval.
1092     */
1093     schedule();
1094     goto out;
1095     default:
1096     /*
1097     * Another bit of PARANOID. Note that the retval will be
1098     * 0 since no piece of kernel is supposed to do a check
1099     * for a negative retval of schedule_timeout() (since it
1100     * should never happens anyway). You just have the printk()
1101     * that will tell you if something is gone wrong and where.
1102     */
1103     if (timeout < 0)
1104     {
1105     printk(KERN_ERR "schedule_timeout: wrong timeout "
1106     "value %lx from %p\n", timeout,
1107     __builtin_return_address(0));
1108     current->state = TASK_RUNNING;
1109     goto out;
1110     }
1111     }
1112    
1113     expire = timeout + jiffies;
1114    
1115     init_timer(&timer);
1116     timer.expires = expire;
1117     timer.data = (unsigned long) current;
1118     timer.function = process_timeout;
1119    
1120     add_timer(&timer);
1121     schedule();
1122     del_singleshot_timer_sync(&timer);
1123    
1124     timeout = expire - jiffies;
1125    
1126     out:
1127     return timeout < 0 ? 0 : timeout;
1128     }
1129    
1130     EXPORT_SYMBOL(schedule_timeout);
1131    
1132     /* Thread ID - the internal kernel "pid" */
1133     asmlinkage long sys_gettid(void)
1134     {
1135     return current->pid;
1136     }
1137    
1138     static long __sched nanosleep_restart(struct restart_block *restart)
1139     {
1140     unsigned long expire = restart->arg0, now = jiffies;
1141     struct timespec __user *rmtp = (struct timespec __user *) restart->arg1;
1142     long ret;
1143    
1144     /* Did it expire while we handled signals? */
1145     if (!time_after(expire, now))
1146     return 0;
1147    
1148     current->state = TASK_INTERRUPTIBLE;
1149     expire = schedule_timeout(expire - now);
1150    
1151     ret = 0;
1152     if (expire) {
1153     struct timespec t;
1154     jiffies_to_timespec(expire, &t);
1155    
1156     ret = -ERESTART_RESTARTBLOCK;
1157     if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
1158     ret = -EFAULT;
1159     /* The 'restart' block is already filled in */
1160     }
1161     return ret;
1162     }
1163    
1164     asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp)
1165     {
1166     struct timespec t;
1167     unsigned long expire;
1168     long ret;
1169    
1170     if (copy_from_user(&t, rqtp, sizeof(t)))
1171     return -EFAULT;
1172    
1173     if ((t.tv_nsec >= 1000000000L) || (t.tv_nsec < 0) || (t.tv_sec < 0))
1174     return -EINVAL;
1175    
1176     expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
1177     current->state = TASK_INTERRUPTIBLE;
1178     expire = schedule_timeout(expire);
1179    
1180     ret = 0;
1181     if (expire) {
1182     struct restart_block *restart;
1183     jiffies_to_timespec(expire, &t);
1184     if (rmtp && copy_to_user(rmtp, &t, sizeof(t)))
1185     return -EFAULT;
1186    
1187     restart = &current_thread_info()->restart_block;
1188     restart->fn = nanosleep_restart;
1189     restart->arg0 = jiffies + expire;
1190     restart->arg1 = (unsigned long) rmtp;
1191     ret = -ERESTART_RESTARTBLOCK;
1192     }
1193     return ret;
1194     }
1195    
1196     /*
1197     * sys_sysinfo - fill in sysinfo struct
1198     */
1199     asmlinkage long sys_sysinfo(struct sysinfo __user *info)
1200     {
1201     struct sysinfo val;
1202     unsigned long mem_total, sav_total;
1203     unsigned int mem_unit, bitcount;
1204     unsigned long seq;
1205    
1206     memset((char *)&val, 0, sizeof(struct sysinfo));
1207    
1208     do {
1209     struct timespec tp;
1210     seq = read_seqbegin(&xtime_lock);
1211    
1212     /*
1213     * This is annoying. The below is the same thing
1214     * posix_get_clock_monotonic() does, but it wants to
1215     * take the lock which we want to cover the loads stuff
1216     * too.
1217     */
1218    
1219     getnstimeofday(&tp);
1220     tp.tv_sec += wall_to_monotonic.tv_sec;
1221     tp.tv_nsec += wall_to_monotonic.tv_nsec;
1222     if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
1223     tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
1224     tp.tv_sec++;
1225     }
1226     val.uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
1227    
1228     val.loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
1229     val.loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
1230     val.loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
1231    
1232     val.procs = nr_threads;
1233     } while (read_seqretry(&xtime_lock, seq));
1234    
1235     si_meminfo(&val);
1236     si_swapinfo(&val);
1237    
1238     /*
1239     * If the sum of all the available memory (i.e. ram + swap)
1240     * is less than can be stored in a 32 bit unsigned long then
1241     * we can be binary compatible with 2.2.x kernels. If not,
1242     * well, in that case 2.2.x was broken anyways...
1243     *
1244     * -Erik Andersen <andersee@debian.org>
1245     */
1246    
1247     mem_total = val.totalram + val.totalswap;
1248     if (mem_total < val.totalram || mem_total < val.totalswap)
1249     goto out;
1250     bitcount = 0;
1251     mem_unit = val.mem_unit;
1252     while (mem_unit > 1) {
1253     bitcount++;
1254     mem_unit >>= 1;
1255     sav_total = mem_total;
1256     mem_total <<= 1;
1257     if (mem_total < sav_total)
1258     goto out;
1259     }
1260    
1261     /*
1262     * If mem_total did not overflow, multiply all memory values by
1263     * val.mem_unit and set it to 1. This leaves things compatible
1264     * with 2.2.x, and also retains compatibility with earlier 2.4.x
1265     * kernels...
1266     */
1267    
1268     val.mem_unit = 1;
1269     val.totalram <<= bitcount;
1270     val.freeram <<= bitcount;
1271     val.sharedram <<= bitcount;
1272     val.bufferram <<= bitcount;
1273     val.totalswap <<= bitcount;
1274     val.freeswap <<= bitcount;
1275     val.totalhigh <<= bitcount;
1276     val.freehigh <<= bitcount;
1277    
1278     out:
1279     if (copy_to_user(info, &val, sizeof(struct sysinfo)))
1280     return -EFAULT;
1281    
1282     return 0;
1283     }
1284    
1285     static void __devinit init_timers_cpu(int cpu)
1286     {
1287     int j;
1288     tvec_base_t *base;
1289    
1290     base = &per_cpu(tvec_bases, cpu);
1291     spin_lock_init(&base->lock);
1292     for (j = 0; j < TVN_SIZE; j++) {
1293     INIT_LIST_HEAD(base->tv5.vec + j);
1294     INIT_LIST_HEAD(base->tv4.vec + j);
1295     INIT_LIST_HEAD(base->tv3.vec + j);
1296     INIT_LIST_HEAD(base->tv2.vec + j);
1297     }
1298     for (j = 0; j < TVR_SIZE; j++)
1299     INIT_LIST_HEAD(base->tv1.vec + j);
1300    
1301     base->timer_jiffies = jiffies;
1302     }
1303    
1304     #ifdef CONFIG_HOTPLUG_CPU
1305     static int migrate_timer_list(tvec_base_t *new_base, struct list_head *head)
1306     {
1307     struct timer_list *timer;
1308    
1309     while (!list_empty(head)) {
1310     timer = list_entry(head->next, struct timer_list, entry);
1311     /* We're locking backwards from __mod_timer order here,
1312     beware deadlock. */
1313     if (!spin_trylock(&timer->lock))
1314     return 0;
1315     list_del(&timer->entry);
1316     internal_add_timer(new_base, timer);
1317     timer->base = new_base;
1318     spin_unlock(&timer->lock);
1319     }
1320     return 1;
1321     }
1322    
1323     static void __devinit migrate_timers(int cpu)
1324     {
1325     tvec_base_t *old_base;
1326     tvec_base_t *new_base;
1327     int i;
1328    
1329     BUG_ON(cpu_online(cpu));
1330     old_base = &per_cpu(tvec_bases, cpu);
1331     new_base = &get_cpu_var(tvec_bases);
1332    
1333     local_irq_disable();
1334     again:
1335     /* Prevent deadlocks via ordering by old_base < new_base. */
1336     if (old_base < new_base) {
1337     spin_lock(&new_base->lock);
1338     spin_lock(&old_base->lock);
1339     } else {
1340     spin_lock(&old_base->lock);
1341     spin_lock(&new_base->lock);
1342     }
1343    
1344     if (old_base->running_timer)
1345     BUG();
1346     for (i = 0; i < TVR_SIZE; i++)
1347     if (!migrate_timer_list(new_base, old_base->tv1.vec + i))
1348     goto unlock_again;
1349     for (i = 0; i < TVN_SIZE; i++)
1350     if (!migrate_timer_list(new_base, old_base->tv2.vec + i)
1351     || !migrate_timer_list(new_base, old_base->tv3.vec + i)
1352     || !migrate_timer_list(new_base, old_base->tv4.vec + i)
1353     || !migrate_timer_list(new_base, old_base->tv5.vec + i))
1354     goto unlock_again;
1355     spin_unlock(&old_base->lock);
1356     spin_unlock(&new_base->lock);
1357     local_irq_enable();
1358     put_cpu_var(tvec_bases);
1359     return;
1360    
1361     unlock_again:
1362     /* Avoid deadlock with __mod_timer, by backing off. */
1363     spin_unlock(&old_base->lock);
1364     spin_unlock(&new_base->lock);
1365     cpu_relax();
1366     goto again;
1367     }
1368     #endif /* CONFIG_HOTPLUG_CPU */
1369    
1370     static int __devinit timer_cpu_notify(struct notifier_block *self,
1371     unsigned long action, void *hcpu)
1372     {
1373     long cpu = (long)hcpu;
1374     switch(action) {
1375     case CPU_UP_PREPARE:
1376     init_timers_cpu(cpu);
1377     break;
1378     #ifdef CONFIG_HOTPLUG_CPU
1379     case CPU_DEAD:
1380     migrate_timers(cpu);
1381     break;
1382     #endif
1383     default:
1384     break;
1385     }
1386     return NOTIFY_OK;
1387     }
1388    
1389     static struct notifier_block __devinitdata timers_nb = {
1390     .notifier_call = timer_cpu_notify,
1391     };
1392    
1393    
1394     void __init init_timers(void)
1395     {
1396     timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
1397     (void *)(long)smp_processor_id());
1398     register_cpu_notifier(&timers_nb);
1399     open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
1400     }
1401    
1402     #ifdef CONFIG_TIME_INTERPOLATION
1403    
1404     struct time_interpolator *time_interpolator;
1405     static struct time_interpolator *time_interpolator_list;
1406     static DEFINE_SPINLOCK(time_interpolator_lock);
1407    
1408     static inline u64 time_interpolator_get_cycles(unsigned int src)
1409     {
1410     unsigned long (*x)(void);
1411    
1412     switch (src)
1413     {
1414     case TIME_SOURCE_FUNCTION:
1415     x = time_interpolator->addr;
1416     return x();
1417    
1418     case TIME_SOURCE_MMIO64 :
1419     return readq((void __iomem *) time_interpolator->addr);
1420    
1421     case TIME_SOURCE_MMIO32 :
1422     return readl((void __iomem *) time_interpolator->addr);
1423    
1424     default: return get_cycles();
1425     }
1426     }
1427    
1428     static inline u64 time_interpolator_get_counter(void)
1429     {
1430     unsigned int src = time_interpolator->source;
1431    
1432     if (time_interpolator->jitter)
1433     {
1434     u64 lcycle;
1435     u64 now;
1436    
1437     do {
1438     lcycle = time_interpolator->last_cycle;
1439     now = time_interpolator_get_cycles(src);
1440     if (lcycle && time_after(lcycle, now))
1441     return lcycle;
1442     /* Keep track of the last timer value returned. The use of cmpxchg here
1443     * will cause contention in an SMP environment.
1444     */
1445     } while (unlikely(cmpxchg(&time_interpolator->last_cycle, lcycle, now) != lcycle));
1446     return now;
1447     }
1448     else
1449     return time_interpolator_get_cycles(src);
1450     }
1451    
1452     void time_interpolator_reset(void)
1453     {
1454     time_interpolator->offset = 0;
1455     time_interpolator->last_counter = time_interpolator_get_counter();
1456     }
1457    
1458     #define GET_TI_NSECS(count,i) (((((count) - i->last_counter) & (i)->mask) * (i)->nsec_per_cyc) >> (i)->shift)
1459    
1460     unsigned long time_interpolator_get_offset(void)
1461     {
1462     /* If we do not have a time interpolator set up then just return zero */
1463     if (!time_interpolator)
1464     return 0;
1465    
1466     return time_interpolator->offset +
1467     GET_TI_NSECS(time_interpolator_get_counter(), time_interpolator);
1468     }
1469    
1470     #define INTERPOLATOR_ADJUST 65536
1471     #define INTERPOLATOR_MAX_SKIP 10*INTERPOLATOR_ADJUST
1472    
1473     static void time_interpolator_update(long delta_nsec)
1474     {
1475     u64 counter;
1476     unsigned long offset;
1477    
1478     /* If there is no time interpolator set up then do nothing */
1479     if (!time_interpolator)
1480     return;
1481    
1482     /* The interpolator compensates for late ticks by accumulating
1483     * the late time in time_interpolator->offset. A tick earlier than
1484     * expected will lead to a reset of the offset and a corresponding
1485     * jump of the clock forward. Again this only works if the
1486     * interpolator clock is running slightly slower than the regular clock
1487     * and the tuning logic insures that.
1488     */
1489    
1490     counter = time_interpolator_get_counter();
1491     offset = time_interpolator->offset + GET_TI_NSECS(counter, time_interpolator);
1492    
1493     if (delta_nsec < 0 || (unsigned long) delta_nsec < offset)
1494     time_interpolator->offset = offset - delta_nsec;
1495     else {
1496     time_interpolator->skips++;
1497     time_interpolator->ns_skipped += delta_nsec - offset;
1498     time_interpolator->offset = 0;
1499     }
1500     time_interpolator->last_counter = counter;
1501    
1502     /* Tuning logic for time interpolator invoked every minute or so.
1503     * Decrease interpolator clock speed if no skips occurred and an offset is carried.
1504     * Increase interpolator clock speed if we skip too much time.
1505     */
1506     if (jiffies % INTERPOLATOR_ADJUST == 0)
1507     {
1508     if (time_interpolator->skips == 0 && time_interpolator->offset > TICK_NSEC)
1509     time_interpolator->nsec_per_cyc--;
1510     if (time_interpolator->ns_skipped > INTERPOLATOR_MAX_SKIP && time_interpolator->offset == 0)
1511     time_interpolator->nsec_per_cyc++;
1512     time_interpolator->skips = 0;
1513     time_interpolator->ns_skipped = 0;
1514     }
1515     }
1516    
1517     static inline int
1518     is_better_time_interpolator(struct time_interpolator *new)
1519     {
1520     if (!time_interpolator)
1521     return 1;
1522     return new->frequency > 2*time_interpolator->frequency ||
1523     (unsigned long)new->drift < (unsigned long)time_interpolator->drift;
1524     }
1525    
1526     void
1527     register_time_interpolator(struct time_interpolator *ti)
1528     {
1529     unsigned long flags;
1530    
1531     /* Sanity check */
1532     if (ti->frequency == 0 || ti->mask == 0)
1533     BUG();
1534    
1535     ti->nsec_per_cyc = ((u64)NSEC_PER_SEC << ti->shift) / ti->frequency;
1536     spin_lock(&time_interpolator_lock);
1537     write_seqlock_irqsave(&xtime_lock, flags);
1538     if (is_better_time_interpolator(ti)) {
1539     time_interpolator = ti;
1540     time_interpolator_reset();
1541     }
1542     write_sequnlock_irqrestore(&xtime_lock, flags);
1543    
1544     ti->next = time_interpolator_list;
1545     time_interpolator_list = ti;
1546     spin_unlock(&time_interpolator_lock);
1547     }
1548    
1549     void
1550     unregister_time_interpolator(struct time_interpolator *ti)
1551     {
1552     struct time_interpolator *curr, **prev;
1553     unsigned long flags;
1554    
1555     spin_lock(&time_interpolator_lock);
1556     prev = &time_interpolator_list;
1557     for (curr = *prev; curr; curr = curr->next) {
1558     if (curr == ti) {
1559     *prev = curr->next;
1560     break;
1561     }
1562     prev = &curr->next;
1563     }
1564    
1565     write_seqlock_irqsave(&xtime_lock, flags);
1566     if (ti == time_interpolator) {
1567     /* we lost the best time-interpolator: */
1568     time_interpolator = NULL;
1569     /* find the next-best interpolator */
1570     for (curr = time_interpolator_list; curr; curr = curr->next)
1571     if (is_better_time_interpolator(curr))
1572     time_interpolator = curr;
1573     time_interpolator_reset();
1574     }
1575     write_sequnlock_irqrestore(&xtime_lock, flags);
1576     spin_unlock(&time_interpolator_lock);
1577     }
1578     #endif /* CONFIG_TIME_INTERPOLATION */
1579    
1580     /**
1581     * msleep - sleep safely even with waitqueue interruptions
1582     * @msecs: Time in milliseconds to sleep for
1583     */
1584     void msleep(unsigned int msecs)
1585     {
1586     unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1587    
1588     while (timeout) {
1589     set_current_state(TASK_UNINTERRUPTIBLE);
1590     timeout = schedule_timeout(timeout);
1591     }
1592     }
1593    
1594     EXPORT_SYMBOL(msleep);
1595    
1596     /**
1597     * msleep_interruptible - sleep waiting for waitqueue interruptions
1598     * @msecs: Time in milliseconds to sleep for
1599     */
1600     unsigned long msleep_interruptible(unsigned int msecs)
1601     {
1602     unsigned long timeout = msecs_to_jiffies(msecs) + 1;
1603    
1604     while (timeout && !signal_pending(current)) {
1605     set_current_state(TASK_INTERRUPTIBLE);
1606     timeout = schedule_timeout(timeout);
1607     }
1608     return jiffies_to_msecs(timeout);
1609     }
1610    
1611     EXPORT_SYMBOL(msleep_interruptible);