Annotation of /trunk/kernel26-alx/patches-2.6.26-r1/0101-2.6.26.2-all-fixes.patch
Parent Directory | Revision Log
Revision 681 -
(hide annotations)
(download)
Wed Sep 17 19:42:13 2008 UTC (16 years ago) by niro
File size: 89436 byte(s)
Wed Sep 17 19:42:13 2008 UTC (16 years ago) by niro
File size: 89436 byte(s)
-2.6.26-alx-r1
1 | niro | 681 | diff --git a/Documentation/ftrace.txt b/Documentation/ftrace.txt |
2 | deleted file mode 100644 | ||
3 | index 13e4bf0..0000000 | ||
4 | --- a/Documentation/ftrace.txt | ||
5 | +++ /dev/null | ||
6 | @@ -1,1353 +0,0 @@ | ||
7 | - ftrace - Function Tracer | ||
8 | - ======================== | ||
9 | - | ||
10 | -Copyright 2008 Red Hat Inc. | ||
11 | -Author: Steven Rostedt <srostedt@redhat.com> | ||
12 | - | ||
13 | - | ||
14 | -Introduction | ||
15 | ------------- | ||
16 | - | ||
17 | -Ftrace is an internal tracer designed to help out developers and | ||
18 | -designers of systems to find what is going on inside the kernel. | ||
19 | -It can be used for debugging or analyzing latencies and performance | ||
20 | -issues that take place outside of user-space. | ||
21 | - | ||
22 | -Although ftrace is the function tracer, it also includes an | ||
23 | -infrastructure that allows for other types of tracing. Some of the | ||
24 | -tracers that are currently in ftrace is a tracer to trace | ||
25 | -context switches, the time it takes for a high priority task to | ||
26 | -run after it was woken up, the time interrupts are disabled, and | ||
27 | -more. | ||
28 | - | ||
29 | - | ||
30 | -The File System | ||
31 | ---------------- | ||
32 | - | ||
33 | -Ftrace uses the debugfs file system to hold the control files as well | ||
34 | -as the files to display output. | ||
35 | - | ||
36 | -To mount the debugfs system: | ||
37 | - | ||
38 | - # mkdir /debug | ||
39 | - # mount -t debugfs nodev /debug | ||
40 | - | ||
41 | - | ||
42 | -That's it! (assuming that you have ftrace configured into your kernel) | ||
43 | - | ||
44 | -After mounting the debugfs, you can see a directory called | ||
45 | -"tracing". This directory contains the control and output files | ||
46 | -of ftrace. Here is a list of some of the key files: | ||
47 | - | ||
48 | - | ||
49 | - Note: all time values are in microseconds. | ||
50 | - | ||
51 | - current_tracer : This is used to set or display the current tracer | ||
52 | - that is configured. | ||
53 | - | ||
54 | - available_tracers : This holds the different types of tracers that | ||
55 | - has been compiled into the kernel. The tracers | ||
56 | - listed here can be configured by echoing in their | ||
57 | - name into current_tracer. | ||
58 | - | ||
59 | - tracing_enabled : This sets or displays whether the current_tracer | ||
60 | - is activated and tracing or not. Echo 0 into this | ||
61 | - file to disable the tracer or 1 (or non-zero) to | ||
62 | - enable it. | ||
63 | - | ||
64 | - trace : This file holds the output of the trace in a human readable | ||
65 | - format. | ||
66 | - | ||
67 | - latency_trace : This file shows the same trace but the information | ||
68 | - is organized more to display possible latencies | ||
69 | - in the system. | ||
70 | - | ||
71 | - trace_pipe : The output is the same as the "trace" file but this | ||
72 | - file is meant to be streamed with live tracing. | ||
73 | - Reads from this file will block until new data | ||
74 | - is retrieved. Unlike the "trace" and "latency_trace" | ||
75 | - files, this file is a consumer. This means reading | ||
76 | - from this file causes sequential reads to display | ||
77 | - more current data. Once data is read from this | ||
78 | - file, it is consumed, and will not be read | ||
79 | - again with a sequential read. The "trace" and | ||
80 | - "latency_trace" files are static, and if the | ||
81 | - tracer isn't adding more data, they will display | ||
82 | - the same information every time they are read. | ||
83 | - | ||
84 | - iter_ctrl : This file lets the user control the amount of data | ||
85 | - that is displayed in one of the above output | ||
86 | - files. | ||
87 | - | ||
88 | - trace_max_latency : Some of the tracers record the max latency. | ||
89 | - For example, the time interrupts are disabled. | ||
90 | - This time is saved in this file. The max trace | ||
91 | - will also be stored, and displayed by either | ||
92 | - "trace" or "latency_trace". A new max trace will | ||
93 | - only be recorded if the latency is greater than | ||
94 | - the value in this file. (in microseconds) | ||
95 | - | ||
96 | - trace_entries : This sets or displays the number of trace | ||
97 | - entries each CPU buffer can hold. The tracer buffers | ||
98 | - are the same size for each CPU, so care must be | ||
99 | - taken when modifying the trace_entries. The number | ||
100 | - of actually entries will be the number given | ||
101 | - times the number of possible CPUS. The buffers | ||
102 | - are saved as individual pages, and the actual entries | ||
103 | - will always be rounded up to entries per page. | ||
104 | - | ||
105 | - This can only be updated when the current_tracer | ||
106 | - is set to "none". | ||
107 | - | ||
108 | - NOTE: It is planned on changing the allocated buffers | ||
109 | - from being the number of possible CPUS to | ||
110 | - the number of online CPUS. | ||
111 | - | ||
112 | - tracing_cpumask : This is a mask that lets the user only trace | ||
113 | - on specified CPUS. The format is a hex string | ||
114 | - representing the CPUS. | ||
115 | - | ||
116 | - set_ftrace_filter : When dynamic ftrace is configured in, the | ||
117 | - code is dynamically modified to disable calling | ||
118 | - of the function profiler (mcount). This lets | ||
119 | - tracing be configured in with practically no overhead | ||
120 | - in performance. This also has a side effect of | ||
121 | - enabling or disabling specific functions to be | ||
122 | - traced. Echoing in names of functions into this | ||
123 | - file will limit the trace to only those files. | ||
124 | - | ||
125 | - set_ftrace_notrace: This has the opposite effect that | ||
126 | - set_ftrace_filter has. Any function that is added | ||
127 | - here will not be traced. If a function exists | ||
128 | - in both set_ftrace_filter and set_ftrace_notrace | ||
129 | - the function will _not_ bet traced. | ||
130 | - | ||
131 | - available_filter_functions : When a function is encountered the first | ||
132 | - time by the dynamic tracer, it is recorded and | ||
133 | - later the call is converted into a nop. This file | ||
134 | - lists the functions that have been recorded | ||
135 | - by the dynamic tracer and these functions can | ||
136 | - be used to set the ftrace filter by the above | ||
137 | - "set_ftrace_filter" file. | ||
138 | - | ||
139 | - | ||
140 | -The Tracers | ||
141 | ------------ | ||
142 | - | ||
143 | -Here are the list of current tracers that can be configured. | ||
144 | - | ||
145 | - ftrace - function tracer that uses mcount to trace all functions. | ||
146 | - It is possible to filter out which functions that are | ||
147 | - traced when dynamic ftrace is configured in. | ||
148 | - | ||
149 | - sched_switch - traces the context switches between tasks. | ||
150 | - | ||
151 | - irqsoff - traces the areas that disable interrupts and saves off | ||
152 | - the trace with the longest max latency. | ||
153 | - See tracing_max_latency. When a new max is recorded, | ||
154 | - it replaces the old trace. It is best to view this | ||
155 | - trace with the latency_trace file. | ||
156 | - | ||
157 | - preemptoff - Similar to irqsoff but traces and records the time | ||
158 | - preemption is disabled. | ||
159 | - | ||
160 | - preemptirqsoff - Similar to irqsoff and preemptoff, but traces and | ||
161 | - records the largest time irqs and/or preemption is | ||
162 | - disabled. | ||
163 | - | ||
164 | - wakeup - Traces and records the max latency that it takes for | ||
165 | - the highest priority task to get scheduled after | ||
166 | - it has been woken up. | ||
167 | - | ||
168 | - none - This is not a tracer. To remove all tracers from tracing | ||
169 | - simply echo "none" into current_tracer. | ||
170 | - | ||
171 | - | ||
172 | -Examples of using the tracer | ||
173 | ----------------------------- | ||
174 | - | ||
175 | -Here are typical examples of using the tracers with only controlling | ||
176 | -them with the debugfs interface (without using any user-land utilities). | ||
177 | - | ||
178 | -Output format: | ||
179 | --------------- | ||
180 | - | ||
181 | -Here's an example of the output format of the file "trace" | ||
182 | - | ||
183 | - -------- | ||
184 | -# tracer: ftrace | ||
185 | -# | ||
186 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
187 | -# | | | | | | ||
188 | - bash-4251 [01] 10152.583854: path_put <-path_walk | ||
189 | - bash-4251 [01] 10152.583855: dput <-path_put | ||
190 | - bash-4251 [01] 10152.583855: _atomic_dec_and_lock <-dput | ||
191 | - -------- | ||
192 | - | ||
193 | -A header is printed with the trace that is represented. In this case | ||
194 | -the tracer is "ftrace". Then a header showing the format. Task name | ||
195 | -"bash", the task PID "4251", the CPU that it was running on | ||
196 | -"01", the timestamp in <secs>.<usecs> format, the function name that was | ||
197 | -traced "path_put" and the parent function that called this function | ||
198 | -"path_walk". | ||
199 | - | ||
200 | -The sched_switch tracer also includes tracing of task wake ups and | ||
201 | -context switches. | ||
202 | - | ||
203 | - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 2916:115:S | ||
204 | - ksoftirqd/1-7 [01] 1453.070013: 7:115:R + 10:115:S | ||
205 | - ksoftirqd/1-7 [01] 1453.070013: 7:115:R ==> 10:115:R | ||
206 | - events/1-10 [01] 1453.070013: 10:115:S ==> 2916:115:R | ||
207 | - kondemand/1-2916 [01] 1453.070013: 2916:115:S ==> 7:115:R | ||
208 | - ksoftirqd/1-7 [01] 1453.070013: 7:115:S ==> 0:140:R | ||
209 | - | ||
210 | -Wake ups are represented by a "+" and the context switches show | ||
211 | -"==>". The format is: | ||
212 | - | ||
213 | - Context switches: | ||
214 | - | ||
215 | - Previous task Next Task | ||
216 | - | ||
217 | - <pid>:<prio>:<state> ==> <pid>:<prio>:<state> | ||
218 | - | ||
219 | - Wake ups: | ||
220 | - | ||
221 | - Current task Task waking up | ||
222 | - | ||
223 | - <pid>:<prio>:<state> + <pid>:<prio>:<state> | ||
224 | - | ||
225 | -The prio is the internal kernel priority, which is inverse to the | ||
226 | -priority that is usually displayed by user-space tools. Zero represents | ||
227 | -the highest priority (99). Prio 100 starts the "nice" priorities with | ||
228 | -100 being equal to nice -20 and 139 being nice 19. The prio "140" is | ||
229 | -reserved for the idle task which is the lowest priority thread (pid 0). | ||
230 | - | ||
231 | - | ||
232 | -Latency trace format | ||
233 | --------------------- | ||
234 | - | ||
235 | -For traces that display latency times, the latency_trace file gives | ||
236 | -a bit more information to see why a latency happened. Here's a typical | ||
237 | -trace. | ||
238 | - | ||
239 | -# tracer: irqsoff | ||
240 | -# | ||
241 | -irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
242 | --------------------------------------------------------------------- | ||
243 | - latency: 97 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
244 | - ----------------- | ||
245 | - | task: swapper-0 (uid:0 nice:0 policy:0 rt_prio:0) | ||
246 | - ----------------- | ||
247 | - => started at: apic_timer_interrupt | ||
248 | - => ended at: do_softirq | ||
249 | - | ||
250 | -# _------=> CPU# | ||
251 | -# / _-----=> irqs-off | ||
252 | -# | / _----=> need-resched | ||
253 | -# || / _---=> hardirq/softirq | ||
254 | -# ||| / _--=> preempt-depth | ||
255 | -# |||| / | ||
256 | -# ||||| delay | ||
257 | -# cmd pid ||||| time | caller | ||
258 | -# \ / ||||| \ | / | ||
259 | - <idle>-0 0d..1 0us+: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
260 | - <idle>-0 0d.s. 97us : __do_softirq (do_softirq) | ||
261 | - <idle>-0 0d.s1 98us : trace_hardirqs_on (do_softirq) | ||
262 | - | ||
263 | - | ||
264 | -vim:ft=help | ||
265 | - | ||
266 | - | ||
267 | -This shows that the current tracer is "irqsoff" tracing the time | ||
268 | -interrupts are disabled. It gives the trace version and the kernel | ||
269 | -this was executed on (2.6.26-rc8). Then it displays the max latency | ||
270 | -in microsecs (97 us). The number of trace entries displayed | ||
271 | -by the total number recorded (both are three: #3/3). The type of | ||
272 | -preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero | ||
273 | -and reserved for later use. #P is the number of online CPUS (#P:2). | ||
274 | - | ||
275 | -The task is the process that was running when the latency happened. | ||
276 | -(swapper pid: 0). | ||
277 | - | ||
278 | -The start and stop that caused the latencies: | ||
279 | - | ||
280 | - apic_timer_interrupt is where the interrupts were disabled. | ||
281 | - do_softirq is where they were enabled again. | ||
282 | - | ||
283 | -The next lines after the header are the trace itself. The header | ||
284 | -explains which is which. | ||
285 | - | ||
286 | - cmd: The name of the process in the trace. | ||
287 | - | ||
288 | - pid: The PID of that process. | ||
289 | - | ||
290 | - CPU#: The CPU that the process was running on. | ||
291 | - | ||
292 | - irqs-off: 'd' interrupts are disabled. '.' otherwise. | ||
293 | - | ||
294 | - need-resched: 'N' task need_resched is set, '.' otherwise. | ||
295 | - | ||
296 | - hardirq/softirq: | ||
297 | - 'H' - hard irq happened inside a softirq. | ||
298 | - 'h' - hard irq is running | ||
299 | - 's' - soft irq is running | ||
300 | - '.' - normal context. | ||
301 | - | ||
302 | - preempt-depth: The level of preempt_disabled | ||
303 | - | ||
304 | -The above is mostly meaningful for kernel developers. | ||
305 | - | ||
306 | - time: This differs from the trace output where as the trace output | ||
307 | - contained a absolute timestamp. This timestamp is relative | ||
308 | - to the start of the first entry in the the trace. | ||
309 | - | ||
310 | - delay: This is just to help catch your eye a bit better. And | ||
311 | - needs to be fixed to be only relative to the same CPU. | ||
312 | - The marks is determined by the difference between this | ||
313 | - current trace and the next trace. | ||
314 | - '!' - greater than preempt_mark_thresh (default 100) | ||
315 | - '+' - greater than 1 microsecond | ||
316 | - ' ' - less than or equal to 1 microsecond. | ||
317 | - | ||
318 | - The rest is the same as the 'trace' file. | ||
319 | - | ||
320 | - | ||
321 | -iter_ctrl | ||
322 | ---------- | ||
323 | - | ||
324 | -The iter_ctrl file is used to control what gets printed in the trace | ||
325 | -output. To see what is available, simply cat the file: | ||
326 | - | ||
327 | - cat /debug/tracing/iter_ctrl | ||
328 | - print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \ | ||
329 | - noblock nostacktrace nosched-tree | ||
330 | - | ||
331 | -To disable one of the options, echo in the option appended with "no". | ||
332 | - | ||
333 | - echo noprint-parent > /debug/tracing/iter_ctrl | ||
334 | - | ||
335 | -To enable an option, leave off the "no". | ||
336 | - | ||
337 | - echo sym-offest > /debug/tracing/iter_ctrl | ||
338 | - | ||
339 | -Here are the available options: | ||
340 | - | ||
341 | - print-parent - On function traces, display the calling function | ||
342 | - as well as the function being traced. | ||
343 | - | ||
344 | - print-parent: | ||
345 | - bash-4000 [01] 1477.606694: simple_strtoul <-strict_strtoul | ||
346 | - | ||
347 | - noprint-parent: | ||
348 | - bash-4000 [01] 1477.606694: simple_strtoul | ||
349 | - | ||
350 | - | ||
351 | - sym-offset - Display not only the function name, but also the offset | ||
352 | - in the function. For example, instead of seeing just | ||
353 | - "ktime_get" you will see "ktime_get+0xb/0x20" | ||
354 | - | ||
355 | - sym-offset: | ||
356 | - bash-4000 [01] 1477.606694: simple_strtoul+0x6/0xa0 | ||
357 | - | ||
358 | - sym-addr - this will also display the function address as well as | ||
359 | - the function name. | ||
360 | - | ||
361 | - sym-addr: | ||
362 | - bash-4000 [01] 1477.606694: simple_strtoul <c0339346> | ||
363 | - | ||
364 | - verbose - This deals with the latency_trace file. | ||
365 | - | ||
366 | - bash 4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \ | ||
367 | - (+0.000ms): simple_strtoul (strict_strtoul) | ||
368 | - | ||
369 | - raw - This will display raw numbers. This option is best for use with | ||
370 | - user applications that can translate the raw numbers better than | ||
371 | - having it done in the kernel. | ||
372 | - | ||
373 | - hex - similar to raw, but the numbers will be in a hexadecimal format. | ||
374 | - | ||
375 | - bin - This will print out the formats in raw binary. | ||
376 | - | ||
377 | - block - TBD (needs update) | ||
378 | - | ||
379 | - stacktrace - This is one of the options that changes the trace itself. | ||
380 | - When a trace is recorded, so is the stack of functions. | ||
381 | - This allows for back traces of trace sites. | ||
382 | - | ||
383 | - sched-tree - TBD (any users??) | ||
384 | - | ||
385 | - | ||
386 | -sched_switch | ||
387 | ------------- | ||
388 | - | ||
389 | -This tracer simply records schedule switches. Here's an example | ||
390 | -on how to implement it. | ||
391 | - | ||
392 | - # echo sched_switch > /debug/tracing/current_tracer | ||
393 | - # echo 1 > /debug/tracing/tracing_enabled | ||
394 | - # sleep 1 | ||
395 | - # echo 0 > /debug/tracing/tracing_enabled | ||
396 | - # cat /debug/tracing/trace | ||
397 | - | ||
398 | -# tracer: sched_switch | ||
399 | -# | ||
400 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
401 | -# | | | | | | ||
402 | - bash-3997 [01] 240.132281: 3997:120:R + 4055:120:R | ||
403 | - bash-3997 [01] 240.132284: 3997:120:R ==> 4055:120:R | ||
404 | - sleep-4055 [01] 240.132371: 4055:120:S ==> 3997:120:R | ||
405 | - bash-3997 [01] 240.132454: 3997:120:R + 4055:120:S | ||
406 | - bash-3997 [01] 240.132457: 3997:120:R ==> 4055:120:R | ||
407 | - sleep-4055 [01] 240.132460: 4055:120:D ==> 3997:120:R | ||
408 | - bash-3997 [01] 240.132463: 3997:120:R + 4055:120:D | ||
409 | - bash-3997 [01] 240.132465: 3997:120:R ==> 4055:120:R | ||
410 | - <idle>-0 [00] 240.132589: 0:140:R + 4:115:S | ||
411 | - <idle>-0 [00] 240.132591: 0:140:R ==> 4:115:R | ||
412 | - ksoftirqd/0-4 [00] 240.132595: 4:115:S ==> 0:140:R | ||
413 | - <idle>-0 [00] 240.132598: 0:140:R + 4:115:S | ||
414 | - <idle>-0 [00] 240.132599: 0:140:R ==> 4:115:R | ||
415 | - ksoftirqd/0-4 [00] 240.132603: 4:115:S ==> 0:140:R | ||
416 | - sleep-4055 [01] 240.133058: 4055:120:S ==> 3997:120:R | ||
417 | - [...] | ||
418 | - | ||
419 | - | ||
420 | -As we have discussed previously about this format, the header shows | ||
421 | -the name of the trace and points to the options. The "FUNCTION" | ||
422 | -is a misnomer since here it represents the wake ups and context | ||
423 | -switches. | ||
424 | - | ||
425 | -The sched_switch only lists the wake ups (represented with '+') | ||
426 | -and context switches ('==>') with the previous task or current | ||
427 | -first followed by the next task or task waking up. The format for both | ||
428 | -of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO | ||
429 | -is the inverse of the actual priority with zero (0) being the highest | ||
430 | -priority and the nice values starting at 100 (nice -20). Below is | ||
431 | -a quick chart to map the kernel priority to user land priorities. | ||
432 | - | ||
433 | - Kernel priority: 0 to 99 ==> user RT priority 99 to 0 | ||
434 | - Kernel priority: 100 to 139 ==> user nice -20 to 19 | ||
435 | - Kernel priority: 140 ==> idle task priority | ||
436 | - | ||
437 | -The task states are: | ||
438 | - | ||
439 | - R - running : wants to run, may not actually be running | ||
440 | - S - sleep : process is waiting to be woken up (handles signals) | ||
441 | - D - deep sleep : process must be woken up (ignores signals) | ||
442 | - T - stopped : process suspended | ||
443 | - t - traced : process is being traced (with something like gdb) | ||
444 | - Z - zombie : process waiting to be cleaned up | ||
445 | - X - unknown | ||
446 | - | ||
447 | - | ||
448 | -ftrace_enabled | ||
449 | --------------- | ||
450 | - | ||
451 | -The following tracers give different output depending on whether | ||
452 | -or not the sysctl ftrace_enabled is set. To set ftrace_enabled, | ||
453 | -one can either use the sysctl function or set it via the proc | ||
454 | -file system interface. | ||
455 | - | ||
456 | - sysctl kernel.ftrace_enabled=1 | ||
457 | - | ||
458 | - or | ||
459 | - | ||
460 | - echo 1 > /proc/sys/kernel/ftrace_enabled | ||
461 | - | ||
462 | -To disable ftrace_enabled simply replace the '1' with '0' in | ||
463 | -the above commands. | ||
464 | - | ||
465 | -When ftrace_enabled is set the tracers will also record the functions | ||
466 | -that are within the trace. The descriptions of the tracers | ||
467 | -will also show an example with ftrace enabled. | ||
468 | - | ||
469 | - | ||
470 | -irqsoff | ||
471 | -------- | ||
472 | - | ||
473 | -When interrupts are disabled, the CPU can not react to any other | ||
474 | -external event (besides NMIs and SMIs). This prevents the timer | ||
475 | -interrupt from triggering or the mouse interrupt from letting the | ||
476 | -kernel know of a new mouse event. The result is a latency with the | ||
477 | -reaction time. | ||
478 | - | ||
479 | -The irqsoff tracer tracks the time interrupts are disabled and when | ||
480 | -they are re-enabled. When a new maximum latency is hit, it saves off | ||
481 | -the trace so that it may be retrieved at a later time. Every time a | ||
482 | -new maximum in reached, the old saved trace is discarded and the new | ||
483 | -trace is saved. | ||
484 | - | ||
485 | -To reset the maximum, echo 0 into tracing_max_latency. Here's an | ||
486 | -example: | ||
487 | - | ||
488 | - # echo irqsoff > /debug/tracing/current_tracer | ||
489 | - # echo 0 > /debug/tracing/tracing_max_latency | ||
490 | - # echo 1 > /debug/tracing/tracing_enabled | ||
491 | - # ls -ltr | ||
492 | - [...] | ||
493 | - # echo 0 > /debug/tracing/tracing_enabled | ||
494 | - # cat /debug/tracing/latency_trace | ||
495 | -# tracer: irqsoff | ||
496 | -# | ||
497 | -irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
498 | --------------------------------------------------------------------- | ||
499 | - latency: 6 us, #3/3, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
500 | - ----------------- | ||
501 | - | task: bash-4269 (uid:0 nice:0 policy:0 rt_prio:0) | ||
502 | - ----------------- | ||
503 | - => started at: copy_page_range | ||
504 | - => ended at: copy_page_range | ||
505 | - | ||
506 | -# _------=> CPU# | ||
507 | -# / _-----=> irqs-off | ||
508 | -# | / _----=> need-resched | ||
509 | -# || / _---=> hardirq/softirq | ||
510 | -# ||| / _--=> preempt-depth | ||
511 | -# |||| / | ||
512 | -# ||||| delay | ||
513 | -# cmd pid ||||| time | caller | ||
514 | -# \ / ||||| \ | / | ||
515 | - bash-4269 1...1 0us+: _spin_lock (copy_page_range) | ||
516 | - bash-4269 1...1 7us : _spin_unlock (copy_page_range) | ||
517 | - bash-4269 1...2 7us : trace_preempt_on (copy_page_range) | ||
518 | - | ||
519 | - | ||
520 | -vim:ft=help | ||
521 | - | ||
522 | -Here we see that that we had a latency of 6 microsecs (which is | ||
523 | -very good). The spin_lock in copy_page_range disabled interrupts. | ||
524 | -The difference between the 6 and the displayed timestamp 7us is | ||
525 | -because the clock must have incremented between the time of recording | ||
526 | -the max latency and recording the function that had that latency. | ||
527 | - | ||
528 | -Note the above had ftrace_enabled not set. If we set the ftrace_enabled | ||
529 | -we get a much larger output: | ||
530 | - | ||
531 | -# tracer: irqsoff | ||
532 | -# | ||
533 | -irqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
534 | --------------------------------------------------------------------- | ||
535 | - latency: 50 us, #101/101, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
536 | - ----------------- | ||
537 | - | task: ls-4339 (uid:0 nice:0 policy:0 rt_prio:0) | ||
538 | - ----------------- | ||
539 | - => started at: __alloc_pages_internal | ||
540 | - => ended at: __alloc_pages_internal | ||
541 | - | ||
542 | -# _------=> CPU# | ||
543 | -# / _-----=> irqs-off | ||
544 | -# | / _----=> need-resched | ||
545 | -# || / _---=> hardirq/softirq | ||
546 | -# ||| / _--=> preempt-depth | ||
547 | -# |||| / | ||
548 | -# ||||| delay | ||
549 | -# cmd pid ||||| time | caller | ||
550 | -# \ / ||||| \ | / | ||
551 | - ls-4339 0...1 0us+: get_page_from_freelist (__alloc_pages_internal) | ||
552 | - ls-4339 0d..1 3us : rmqueue_bulk (get_page_from_freelist) | ||
553 | - ls-4339 0d..1 3us : _spin_lock (rmqueue_bulk) | ||
554 | - ls-4339 0d..1 4us : add_preempt_count (_spin_lock) | ||
555 | - ls-4339 0d..2 4us : __rmqueue (rmqueue_bulk) | ||
556 | - ls-4339 0d..2 5us : __rmqueue_smallest (__rmqueue) | ||
557 | - ls-4339 0d..2 5us : __mod_zone_page_state (__rmqueue_smallest) | ||
558 | - ls-4339 0d..2 6us : __rmqueue (rmqueue_bulk) | ||
559 | - ls-4339 0d..2 6us : __rmqueue_smallest (__rmqueue) | ||
560 | - ls-4339 0d..2 7us : __mod_zone_page_state (__rmqueue_smallest) | ||
561 | - ls-4339 0d..2 7us : __rmqueue (rmqueue_bulk) | ||
562 | - ls-4339 0d..2 8us : __rmqueue_smallest (__rmqueue) | ||
563 | -[...] | ||
564 | - ls-4339 0d..2 46us : __rmqueue_smallest (__rmqueue) | ||
565 | - ls-4339 0d..2 47us : __mod_zone_page_state (__rmqueue_smallest) | ||
566 | - ls-4339 0d..2 47us : __rmqueue (rmqueue_bulk) | ||
567 | - ls-4339 0d..2 48us : __rmqueue_smallest (__rmqueue) | ||
568 | - ls-4339 0d..2 48us : __mod_zone_page_state (__rmqueue_smallest) | ||
569 | - ls-4339 0d..2 49us : _spin_unlock (rmqueue_bulk) | ||
570 | - ls-4339 0d..2 49us : sub_preempt_count (_spin_unlock) | ||
571 | - ls-4339 0d..1 50us : get_page_from_freelist (__alloc_pages_internal) | ||
572 | - ls-4339 0d..2 51us : trace_hardirqs_on (__alloc_pages_internal) | ||
573 | - | ||
574 | - | ||
575 | -vim:ft=help | ||
576 | - | ||
577 | - | ||
578 | -Here we traced a 50 microsecond latency. But we also see all the | ||
579 | -functions that were called during that time. Note that enabling | ||
580 | -function tracing we endure an added overhead. This overhead may | ||
581 | -extend the latency times. But never the less, this trace has provided | ||
582 | -some very helpful debugging. | ||
583 | - | ||
584 | - | ||
585 | -preemptoff | ||
586 | ----------- | ||
587 | - | ||
588 | -When preemption is disabled we may be able to receive interrupts but | ||
589 | -the task can not be preempted and a higher priority task must wait | ||
590 | -for preemption to be enabled again before it can preempt a lower | ||
591 | -priority task. | ||
592 | - | ||
593 | -The preemptoff tracer traces the places that disables preemption. | ||
594 | -Like the irqsoff, it records the maximum latency that preemption | ||
595 | -was disabled. The control of preemptoff is much like the irqsoff. | ||
596 | - | ||
597 | - # echo preemptoff > /debug/tracing/current_tracer | ||
598 | - # echo 0 > /debug/tracing/tracing_max_latency | ||
599 | - # echo 1 > /debug/tracing/tracing_enabled | ||
600 | - # ls -ltr | ||
601 | - [...] | ||
602 | - # echo 0 > /debug/tracing/tracing_enabled | ||
603 | - # cat /debug/tracing/latency_trace | ||
604 | -# tracer: preemptoff | ||
605 | -# | ||
606 | -preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
607 | --------------------------------------------------------------------- | ||
608 | - latency: 29 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
609 | - ----------------- | ||
610 | - | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
611 | - ----------------- | ||
612 | - => started at: do_IRQ | ||
613 | - => ended at: __do_softirq | ||
614 | - | ||
615 | -# _------=> CPU# | ||
616 | -# / _-----=> irqs-off | ||
617 | -# | / _----=> need-resched | ||
618 | -# || / _---=> hardirq/softirq | ||
619 | -# ||| / _--=> preempt-depth | ||
620 | -# |||| / | ||
621 | -# ||||| delay | ||
622 | -# cmd pid ||||| time | caller | ||
623 | -# \ / ||||| \ | / | ||
624 | - sshd-4261 0d.h. 0us+: irq_enter (do_IRQ) | ||
625 | - sshd-4261 0d.s. 29us : _local_bh_enable (__do_softirq) | ||
626 | - sshd-4261 0d.s1 30us : trace_preempt_on (__do_softirq) | ||
627 | - | ||
628 | - | ||
629 | -vim:ft=help | ||
630 | - | ||
631 | -This has some more changes. Preemption was disabled when an interrupt | ||
632 | -came in (notice the 'h'), and was enabled while doing a softirq. | ||
633 | -(notice the 's'). But we also see that interrupts have been disabled | ||
634 | -when entering the preempt off section and leaving it (the 'd'). | ||
635 | -We do not know if interrupts were enabled in the mean time. | ||
636 | - | ||
637 | -# tracer: preemptoff | ||
638 | -# | ||
639 | -preemptoff latency trace v1.1.5 on 2.6.26-rc8 | ||
640 | --------------------------------------------------------------------- | ||
641 | - latency: 63 us, #87/87, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
642 | - ----------------- | ||
643 | - | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
644 | - ----------------- | ||
645 | - => started at: remove_wait_queue | ||
646 | - => ended at: __do_softirq | ||
647 | - | ||
648 | -# _------=> CPU# | ||
649 | -# / _-----=> irqs-off | ||
650 | -# | / _----=> need-resched | ||
651 | -# || / _---=> hardirq/softirq | ||
652 | -# ||| / _--=> preempt-depth | ||
653 | -# |||| / | ||
654 | -# ||||| delay | ||
655 | -# cmd pid ||||| time | caller | ||
656 | -# \ / ||||| \ | / | ||
657 | - sshd-4261 0d..1 0us : _spin_lock_irqsave (remove_wait_queue) | ||
658 | - sshd-4261 0d..1 1us : _spin_unlock_irqrestore (remove_wait_queue) | ||
659 | - sshd-4261 0d..1 2us : do_IRQ (common_interrupt) | ||
660 | - sshd-4261 0d..1 2us : irq_enter (do_IRQ) | ||
661 | - sshd-4261 0d..1 2us : idle_cpu (irq_enter) | ||
662 | - sshd-4261 0d..1 3us : add_preempt_count (irq_enter) | ||
663 | - sshd-4261 0d.h1 3us : idle_cpu (irq_enter) | ||
664 | - sshd-4261 0d.h. 4us : handle_fasteoi_irq (do_IRQ) | ||
665 | -[...] | ||
666 | - sshd-4261 0d.h. 12us : add_preempt_count (_spin_lock) | ||
667 | - sshd-4261 0d.h1 12us : ack_ioapic_quirk_irq (handle_fasteoi_irq) | ||
668 | - sshd-4261 0d.h1 13us : move_native_irq (ack_ioapic_quirk_irq) | ||
669 | - sshd-4261 0d.h1 13us : _spin_unlock (handle_fasteoi_irq) | ||
670 | - sshd-4261 0d.h1 14us : sub_preempt_count (_spin_unlock) | ||
671 | - sshd-4261 0d.h1 14us : irq_exit (do_IRQ) | ||
672 | - sshd-4261 0d.h1 15us : sub_preempt_count (irq_exit) | ||
673 | - sshd-4261 0d..2 15us : do_softirq (irq_exit) | ||
674 | - sshd-4261 0d... 15us : __do_softirq (do_softirq) | ||
675 | - sshd-4261 0d... 16us : __local_bh_disable (__do_softirq) | ||
676 | - sshd-4261 0d... 16us+: add_preempt_count (__local_bh_disable) | ||
677 | - sshd-4261 0d.s4 20us : add_preempt_count (__local_bh_disable) | ||
678 | - sshd-4261 0d.s4 21us : sub_preempt_count (local_bh_enable) | ||
679 | - sshd-4261 0d.s5 21us : sub_preempt_count (local_bh_enable) | ||
680 | -[...] | ||
681 | - sshd-4261 0d.s6 41us : add_preempt_count (__local_bh_disable) | ||
682 | - sshd-4261 0d.s6 42us : sub_preempt_count (local_bh_enable) | ||
683 | - sshd-4261 0d.s7 42us : sub_preempt_count (local_bh_enable) | ||
684 | - sshd-4261 0d.s5 43us : add_preempt_count (__local_bh_disable) | ||
685 | - sshd-4261 0d.s5 43us : sub_preempt_count (local_bh_enable_ip) | ||
686 | - sshd-4261 0d.s6 44us : sub_preempt_count (local_bh_enable_ip) | ||
687 | - sshd-4261 0d.s5 44us : add_preempt_count (__local_bh_disable) | ||
688 | - sshd-4261 0d.s5 45us : sub_preempt_count (local_bh_enable) | ||
689 | -[...] | ||
690 | - sshd-4261 0d.s. 63us : _local_bh_enable (__do_softirq) | ||
691 | - sshd-4261 0d.s1 64us : trace_preempt_on (__do_softirq) | ||
692 | - | ||
693 | - | ||
694 | -The above is an example of the preemptoff trace with ftrace_enabled | ||
695 | -set. Here we see that interrupts were disabled the entire time. | ||
696 | -The irq_enter code lets us know that we entered an interrupt 'h'. | ||
697 | -Before that, the functions being traced still show that it is not | ||
698 | -in an interrupt, but we can see by the functions themselves that | ||
699 | -this is not the case. | ||
700 | - | ||
701 | -Notice that the __do_softirq when called doesn't have a preempt_count. | ||
702 | -It may seem that we missed a preempt enabled. What really happened | ||
703 | -is that the preempt count is held on the threads stack and we | ||
704 | -switched to the softirq stack (4K stacks in effect). The code | ||
705 | -does not copy the preempt count, but because interrupts are disabled | ||
706 | -we don't need to worry about it. Having a tracer like this is good | ||
707 | -to let people know what really happens inside the kernel. | ||
708 | - | ||
709 | - | ||
710 | -preemptirqsoff | ||
711 | --------------- | ||
712 | - | ||
713 | -Knowing the locations that have interrupts disabled or preemption | ||
714 | -disabled for the longest times is helpful. But sometimes we would | ||
715 | -like to know when either preemption and/or interrupts are disabled. | ||
716 | - | ||
717 | -The following code: | ||
718 | - | ||
719 | - local_irq_disable(); | ||
720 | - call_function_with_irqs_off(); | ||
721 | - preempt_disable(); | ||
722 | - call_function_with_irqs_and_preemption_off(); | ||
723 | - local_irq_enable(); | ||
724 | - call_function_with_preemption_off(); | ||
725 | - preempt_enable(); | ||
726 | - | ||
727 | -The irqsoff tracer will record the total length of | ||
728 | -call_function_with_irqs_off() and | ||
729 | -call_function_with_irqs_and_preemption_off(). | ||
730 | - | ||
731 | -The preemptoff tracer will record the total length of | ||
732 | -call_function_with_irqs_and_preemption_off() and | ||
733 | -call_function_with_preemption_off(). | ||
734 | - | ||
735 | -But neither will trace the time that interrupts and/or preemption | ||
736 | -is disabled. This total time is the time that we can not schedule. | ||
737 | -To record this time, use the preemptirqsoff tracer. | ||
738 | - | ||
739 | -Again, using this trace is much like the irqsoff and preemptoff tracers. | ||
740 | - | ||
741 | - # echo preemptoff > /debug/tracing/current_tracer | ||
742 | - # echo 0 > /debug/tracing/tracing_max_latency | ||
743 | - # echo 1 > /debug/tracing/tracing_enabled | ||
744 | - # ls -ltr | ||
745 | - [...] | ||
746 | - # echo 0 > /debug/tracing/tracing_enabled | ||
747 | - # cat /debug/tracing/latency_trace | ||
748 | -# tracer: preemptirqsoff | ||
749 | -# | ||
750 | -preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
751 | --------------------------------------------------------------------- | ||
752 | - latency: 293 us, #3/3, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
753 | - ----------------- | ||
754 | - | task: ls-4860 (uid:0 nice:0 policy:0 rt_prio:0) | ||
755 | - ----------------- | ||
756 | - => started at: apic_timer_interrupt | ||
757 | - => ended at: __do_softirq | ||
758 | - | ||
759 | -# _------=> CPU# | ||
760 | -# / _-----=> irqs-off | ||
761 | -# | / _----=> need-resched | ||
762 | -# || / _---=> hardirq/softirq | ||
763 | -# ||| / _--=> preempt-depth | ||
764 | -# |||| / | ||
765 | -# ||||| delay | ||
766 | -# cmd pid ||||| time | caller | ||
767 | -# \ / ||||| \ | / | ||
768 | - ls-4860 0d... 0us!: trace_hardirqs_off_thunk (apic_timer_interrupt) | ||
769 | - ls-4860 0d.s. 294us : _local_bh_enable (__do_softirq) | ||
770 | - ls-4860 0d.s1 294us : trace_preempt_on (__do_softirq) | ||
771 | - | ||
772 | - | ||
773 | -vim:ft=help | ||
774 | - | ||
775 | - | ||
776 | -The trace_hardirqs_off_thunk is called from assembly on x86 when | ||
777 | -interrupts are disabled in the assembly code. Without the function | ||
778 | -tracing, we don't know if interrupts were enabled within the preemption | ||
779 | -points. We do see that it started with preemption enabled. | ||
780 | - | ||
781 | -Here is a trace with ftrace_enabled set: | ||
782 | - | ||
783 | - | ||
784 | -# tracer: preemptirqsoff | ||
785 | -# | ||
786 | -preemptirqsoff latency trace v1.1.5 on 2.6.26-rc8 | ||
787 | --------------------------------------------------------------------- | ||
788 | - latency: 105 us, #183/183, CPU#0 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
789 | - ----------------- | ||
790 | - | task: sshd-4261 (uid:0 nice:0 policy:0 rt_prio:0) | ||
791 | - ----------------- | ||
792 | - => started at: write_chan | ||
793 | - => ended at: __do_softirq | ||
794 | - | ||
795 | -# _------=> CPU# | ||
796 | -# / _-----=> irqs-off | ||
797 | -# | / _----=> need-resched | ||
798 | -# || / _---=> hardirq/softirq | ||
799 | -# ||| / _--=> preempt-depth | ||
800 | -# |||| / | ||
801 | -# ||||| delay | ||
802 | -# cmd pid ||||| time | caller | ||
803 | -# \ / ||||| \ | / | ||
804 | - ls-4473 0.N.. 0us : preempt_schedule (write_chan) | ||
805 | - ls-4473 0dN.1 1us : _spin_lock (schedule) | ||
806 | - ls-4473 0dN.1 2us : add_preempt_count (_spin_lock) | ||
807 | - ls-4473 0d..2 2us : put_prev_task_fair (schedule) | ||
808 | -[...] | ||
809 | - ls-4473 0d..2 13us : set_normalized_timespec (ktime_get_ts) | ||
810 | - ls-4473 0d..2 13us : __switch_to (schedule) | ||
811 | - sshd-4261 0d..2 14us : finish_task_switch (schedule) | ||
812 | - sshd-4261 0d..2 14us : _spin_unlock_irq (finish_task_switch) | ||
813 | - sshd-4261 0d..1 15us : add_preempt_count (_spin_lock_irqsave) | ||
814 | - sshd-4261 0d..2 16us : _spin_unlock_irqrestore (hrtick_set) | ||
815 | - sshd-4261 0d..2 16us : do_IRQ (common_interrupt) | ||
816 | - sshd-4261 0d..2 17us : irq_enter (do_IRQ) | ||
817 | - sshd-4261 0d..2 17us : idle_cpu (irq_enter) | ||
818 | - sshd-4261 0d..2 18us : add_preempt_count (irq_enter) | ||
819 | - sshd-4261 0d.h2 18us : idle_cpu (irq_enter) | ||
820 | - sshd-4261 0d.h. 18us : handle_fasteoi_irq (do_IRQ) | ||
821 | - sshd-4261 0d.h. 19us : _spin_lock (handle_fasteoi_irq) | ||
822 | - sshd-4261 0d.h. 19us : add_preempt_count (_spin_lock) | ||
823 | - sshd-4261 0d.h1 20us : _spin_unlock (handle_fasteoi_irq) | ||
824 | - sshd-4261 0d.h1 20us : sub_preempt_count (_spin_unlock) | ||
825 | -[...] | ||
826 | - sshd-4261 0d.h1 28us : _spin_unlock (handle_fasteoi_irq) | ||
827 | - sshd-4261 0d.h1 29us : sub_preempt_count (_spin_unlock) | ||
828 | - sshd-4261 0d.h2 29us : irq_exit (do_IRQ) | ||
829 | - sshd-4261 0d.h2 29us : sub_preempt_count (irq_exit) | ||
830 | - sshd-4261 0d..3 30us : do_softirq (irq_exit) | ||
831 | - sshd-4261 0d... 30us : __do_softirq (do_softirq) | ||
832 | - sshd-4261 0d... 31us : __local_bh_disable (__do_softirq) | ||
833 | - sshd-4261 0d... 31us+: add_preempt_count (__local_bh_disable) | ||
834 | - sshd-4261 0d.s4 34us : add_preempt_count (__local_bh_disable) | ||
835 | -[...] | ||
836 | - sshd-4261 0d.s3 43us : sub_preempt_count (local_bh_enable_ip) | ||
837 | - sshd-4261 0d.s4 44us : sub_preempt_count (local_bh_enable_ip) | ||
838 | - sshd-4261 0d.s3 44us : smp_apic_timer_interrupt (apic_timer_interrupt) | ||
839 | - sshd-4261 0d.s3 45us : irq_enter (smp_apic_timer_interrupt) | ||
840 | - sshd-4261 0d.s3 45us : idle_cpu (irq_enter) | ||
841 | - sshd-4261 0d.s3 46us : add_preempt_count (irq_enter) | ||
842 | - sshd-4261 0d.H3 46us : idle_cpu (irq_enter) | ||
843 | - sshd-4261 0d.H3 47us : hrtimer_interrupt (smp_apic_timer_interrupt) | ||
844 | - sshd-4261 0d.H3 47us : ktime_get (hrtimer_interrupt) | ||
845 | -[...] | ||
846 | - sshd-4261 0d.H3 81us : tick_program_event (hrtimer_interrupt) | ||
847 | - sshd-4261 0d.H3 82us : ktime_get (tick_program_event) | ||
848 | - sshd-4261 0d.H3 82us : ktime_get_ts (ktime_get) | ||
849 | - sshd-4261 0d.H3 83us : getnstimeofday (ktime_get_ts) | ||
850 | - sshd-4261 0d.H3 83us : set_normalized_timespec (ktime_get_ts) | ||
851 | - sshd-4261 0d.H3 84us : clockevents_program_event (tick_program_event) | ||
852 | - sshd-4261 0d.H3 84us : lapic_next_event (clockevents_program_event) | ||
853 | - sshd-4261 0d.H3 85us : irq_exit (smp_apic_timer_interrupt) | ||
854 | - sshd-4261 0d.H3 85us : sub_preempt_count (irq_exit) | ||
855 | - sshd-4261 0d.s4 86us : sub_preempt_count (irq_exit) | ||
856 | - sshd-4261 0d.s3 86us : add_preempt_count (__local_bh_disable) | ||
857 | -[...] | ||
858 | - sshd-4261 0d.s1 98us : sub_preempt_count (net_rx_action) | ||
859 | - sshd-4261 0d.s. 99us : add_preempt_count (_spin_lock_irq) | ||
860 | - sshd-4261 0d.s1 99us+: _spin_unlock_irq (run_timer_softirq) | ||
861 | - sshd-4261 0d.s. 104us : _local_bh_enable (__do_softirq) | ||
862 | - sshd-4261 0d.s. 104us : sub_preempt_count (_local_bh_enable) | ||
863 | - sshd-4261 0d.s. 105us : _local_bh_enable (__do_softirq) | ||
864 | - sshd-4261 0d.s1 105us : trace_preempt_on (__do_softirq) | ||
865 | - | ||
866 | - | ||
867 | -This is a very interesting trace. It started with the preemption of | ||
868 | -the ls task. We see that the task had the "need_resched" bit set | ||
869 | -with the 'N' in the trace. Interrupts are disabled in the spin_lock | ||
870 | -and the trace started. We see that a schedule took place to run | ||
871 | -sshd. When the interrupts were enabled we took an interrupt. | ||
872 | -On return of the interrupt the softirq ran. We took another interrupt | ||
873 | -while running the softirq as we see with the capital 'H'. | ||
874 | - | ||
875 | - | ||
876 | -wakeup | ||
877 | ------- | ||
878 | - | ||
879 | -In Real-Time environment it is very important to know the wakeup | ||
880 | -time it takes for the highest priority task that wakes up to the | ||
881 | -time it executes. This is also known as "schedule latency". | ||
882 | -I stress the point that this is about RT tasks. It is also important | ||
883 | -to know the scheduling latency of non-RT tasks, but the average | ||
884 | -schedule latency is better for non-RT tasks. Tools like | ||
885 | -LatencyTop is more appropriate for such measurements. | ||
886 | - | ||
887 | -Real-Time environments is interested in the worst case latency. | ||
888 | -That is the longest latency it takes for something to happen, and | ||
889 | -not the average. We can have a very fast scheduler that may only | ||
890 | -have a large latency once in a while, but that would not work well | ||
891 | -with Real-Time tasks. The wakeup tracer was designed to record | ||
892 | -the worst case wakeups of RT tasks. Non-RT tasks are not recorded | ||
893 | -because the tracer only records one worst case and tracing non-RT | ||
894 | -tasks that are unpredictable will overwrite the worst case latency | ||
895 | -of RT tasks. | ||
896 | - | ||
897 | -Since this tracer only deals with RT tasks, we will run this slightly | ||
898 | -different than we did with the previous tracers. Instead of performing | ||
899 | -an 'ls' we will run 'sleep 1' under 'chrt' which changes the | ||
900 | -priority of the task. | ||
901 | - | ||
902 | - # echo wakeup > /debug/tracing/current_tracer | ||
903 | - # echo 0 > /debug/tracing/tracing_max_latency | ||
904 | - # echo 1 > /debug/tracing/tracing_enabled | ||
905 | - # chrt -f 5 sleep 1 | ||
906 | - # echo 0 > /debug/tracing/tracing_enabled | ||
907 | - # cat /debug/tracing/latency_trace | ||
908 | -# tracer: wakeup | ||
909 | -# | ||
910 | -wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
911 | --------------------------------------------------------------------- | ||
912 | - latency: 4 us, #2/2, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
913 | - ----------------- | ||
914 | - | task: sleep-4901 (uid:0 nice:0 policy:1 rt_prio:5) | ||
915 | - ----------------- | ||
916 | - | ||
917 | -# _------=> CPU# | ||
918 | -# / _-----=> irqs-off | ||
919 | -# | / _----=> need-resched | ||
920 | -# || / _---=> hardirq/softirq | ||
921 | -# ||| / _--=> preempt-depth | ||
922 | -# |||| / | ||
923 | -# ||||| delay | ||
924 | -# cmd pid ||||| time | caller | ||
925 | -# \ / ||||| \ | / | ||
926 | - <idle>-0 1d.h4 0us+: try_to_wake_up (wake_up_process) | ||
927 | - <idle>-0 1d..4 4us : schedule (cpu_idle) | ||
928 | - | ||
929 | - | ||
930 | -vim:ft=help | ||
931 | - | ||
932 | - | ||
933 | -Running this on an idle system we see that it only took 4 microseconds | ||
934 | -to perform the task switch. Note, since the trace marker in the | ||
935 | -schedule is before the actual "switch" we stop the tracing when | ||
936 | -the recorded task is about to schedule in. This may change if | ||
937 | -we add a new marker at the end of the scheduler. | ||
938 | - | ||
939 | -Notice that the recorded task is 'sleep' with the PID of 4901 and it | ||
940 | -has an rt_prio of 5. This priority is user-space priority and not | ||
941 | -the internal kernel priority. The policy is 1 for SCHED_FIFO and 2 | ||
942 | -for SCHED_RR. | ||
943 | - | ||
944 | -Doing the same with chrt -r 5 and ftrace_enabled set. | ||
945 | - | ||
946 | -# tracer: wakeup | ||
947 | -# | ||
948 | -wakeup latency trace v1.1.5 on 2.6.26-rc8 | ||
949 | --------------------------------------------------------------------- | ||
950 | - latency: 50 us, #60/60, CPU#1 | (M:preempt VP:0, KP:0, SP:0 HP:0 #P:2) | ||
951 | - ----------------- | ||
952 | - | task: sleep-4068 (uid:0 nice:0 policy:2 rt_prio:5) | ||
953 | - ----------------- | ||
954 | - | ||
955 | -# _------=> CPU# | ||
956 | -# / _-----=> irqs-off | ||
957 | -# | / _----=> need-resched | ||
958 | -# || / _---=> hardirq/softirq | ||
959 | -# ||| / _--=> preempt-depth | ||
960 | -# |||| / | ||
961 | -# ||||| delay | ||
962 | -# cmd pid ||||| time | caller | ||
963 | -# \ / ||||| \ | / | ||
964 | -ksoftirq-7 1d.H3 0us : try_to_wake_up (wake_up_process) | ||
965 | -ksoftirq-7 1d.H4 1us : sub_preempt_count (marker_probe_cb) | ||
966 | -ksoftirq-7 1d.H3 2us : check_preempt_wakeup (try_to_wake_up) | ||
967 | -ksoftirq-7 1d.H3 3us : update_curr (check_preempt_wakeup) | ||
968 | -ksoftirq-7 1d.H3 4us : calc_delta_mine (update_curr) | ||
969 | -ksoftirq-7 1d.H3 5us : __resched_task (check_preempt_wakeup) | ||
970 | -ksoftirq-7 1d.H3 6us : task_wake_up_rt (try_to_wake_up) | ||
971 | -ksoftirq-7 1d.H3 7us : _spin_unlock_irqrestore (try_to_wake_up) | ||
972 | -[...] | ||
973 | -ksoftirq-7 1d.H2 17us : irq_exit (smp_apic_timer_interrupt) | ||
974 | -ksoftirq-7 1d.H2 18us : sub_preempt_count (irq_exit) | ||
975 | -ksoftirq-7 1d.s3 19us : sub_preempt_count (irq_exit) | ||
976 | -ksoftirq-7 1..s2 20us : rcu_process_callbacks (__do_softirq) | ||
977 | -[...] | ||
978 | -ksoftirq-7 1..s2 26us : __rcu_process_callbacks (rcu_process_callbacks) | ||
979 | -ksoftirq-7 1d.s2 27us : _local_bh_enable (__do_softirq) | ||
980 | -ksoftirq-7 1d.s2 28us : sub_preempt_count (_local_bh_enable) | ||
981 | -ksoftirq-7 1.N.3 29us : sub_preempt_count (ksoftirqd) | ||
982 | -ksoftirq-7 1.N.2 30us : _cond_resched (ksoftirqd) | ||
983 | -ksoftirq-7 1.N.2 31us : __cond_resched (_cond_resched) | ||
984 | -ksoftirq-7 1.N.2 32us : add_preempt_count (__cond_resched) | ||
985 | -ksoftirq-7 1.N.2 33us : schedule (__cond_resched) | ||
986 | -ksoftirq-7 1.N.2 33us : add_preempt_count (schedule) | ||
987 | -ksoftirq-7 1.N.3 34us : hrtick_clear (schedule) | ||
988 | -ksoftirq-7 1dN.3 35us : _spin_lock (schedule) | ||
989 | -ksoftirq-7 1dN.3 36us : add_preempt_count (_spin_lock) | ||
990 | -ksoftirq-7 1d..4 37us : put_prev_task_fair (schedule) | ||
991 | -ksoftirq-7 1d..4 38us : update_curr (put_prev_task_fair) | ||
992 | -[...] | ||
993 | -ksoftirq-7 1d..5 47us : _spin_trylock (tracing_record_cmdline) | ||
994 | -ksoftirq-7 1d..5 48us : add_preempt_count (_spin_trylock) | ||
995 | -ksoftirq-7 1d..6 49us : _spin_unlock (tracing_record_cmdline) | ||
996 | -ksoftirq-7 1d..6 49us : sub_preempt_count (_spin_unlock) | ||
997 | -ksoftirq-7 1d..4 50us : schedule (__cond_resched) | ||
998 | - | ||
999 | -The interrupt went off while running ksoftirqd. This task runs at | ||
1000 | -SCHED_OTHER. Why didn't we see the 'N' set early? This may be | ||
1001 | -a harmless bug with x86_32 and 4K stacks. The need_reched() function | ||
1002 | -that tests if we need to reschedule looks on the actual stack. | ||
1003 | -Where as the setting of the NEED_RESCHED bit happens on the | ||
1004 | -task's stack. But because we are in a hard interrupt, the test | ||
1005 | -is with the interrupts stack which has that to be false. We don't | ||
1006 | -see the 'N' until we switch back to the task's stack. | ||
1007 | - | ||
1008 | -ftrace | ||
1009 | ------- | ||
1010 | - | ||
1011 | -ftrace is not only the name of the tracing infrastructure, but it | ||
1012 | -is also a name of one of the tracers. The tracer is the function | ||
1013 | -tracer. Enabling the function tracer can be done from the | ||
1014 | -debug file system. Make sure the ftrace_enabled is set otherwise | ||
1015 | -this tracer is a nop. | ||
1016 | - | ||
1017 | - # sysctl kernel.ftrace_enabled=1 | ||
1018 | - # echo ftrace > /debug/tracing/current_tracer | ||
1019 | - # echo 1 > /debug/tracing/tracing_enabled | ||
1020 | - # usleep 1 | ||
1021 | - # echo 0 > /debug/tracing/tracing_enabled | ||
1022 | - # cat /debug/tracing/trace | ||
1023 | -# tracer: ftrace | ||
1024 | -# | ||
1025 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
1026 | -# | | | | | | ||
1027 | - bash-4003 [00] 123.638713: finish_task_switch <-schedule | ||
1028 | - bash-4003 [00] 123.638714: _spin_unlock_irq <-finish_task_switch | ||
1029 | - bash-4003 [00] 123.638714: sub_preempt_count <-_spin_unlock_irq | ||
1030 | - bash-4003 [00] 123.638715: hrtick_set <-schedule | ||
1031 | - bash-4003 [00] 123.638715: _spin_lock_irqsave <-hrtick_set | ||
1032 | - bash-4003 [00] 123.638716: add_preempt_count <-_spin_lock_irqsave | ||
1033 | - bash-4003 [00] 123.638716: _spin_unlock_irqrestore <-hrtick_set | ||
1034 | - bash-4003 [00] 123.638717: sub_preempt_count <-_spin_unlock_irqrestore | ||
1035 | - bash-4003 [00] 123.638717: hrtick_clear <-hrtick_set | ||
1036 | - bash-4003 [00] 123.638718: sub_preempt_count <-schedule | ||
1037 | - bash-4003 [00] 123.638718: sub_preempt_count <-preempt_schedule | ||
1038 | - bash-4003 [00] 123.638719: wait_for_completion <-__stop_machine_run | ||
1039 | - bash-4003 [00] 123.638719: wait_for_common <-wait_for_completion | ||
1040 | - bash-4003 [00] 123.638720: _spin_lock_irq <-wait_for_common | ||
1041 | - bash-4003 [00] 123.638720: add_preempt_count <-_spin_lock_irq | ||
1042 | -[...] | ||
1043 | - | ||
1044 | - | ||
1045 | -Note: It is sometimes better to enable or disable tracing directly from | ||
1046 | -a program, because the buffer may be overflowed by the echo commands | ||
1047 | -before you get to the point you want to trace. It is also easier to | ||
1048 | -stop the tracing at the point that you hit the part that you are | ||
1049 | -interested in. Since the ftrace buffer is a ring buffer with the | ||
1050 | -oldest data being overwritten, usually it is sufficient to start the | ||
1051 | -tracer with an echo command but have you code stop it. Something | ||
1052 | -like the following is usually appropriate for this. | ||
1053 | - | ||
1054 | -int trace_fd; | ||
1055 | -[...] | ||
1056 | -int main(int argc, char *argv[]) { | ||
1057 | - [...] | ||
1058 | - trace_fd = open("/debug/tracing/tracing_enabled", O_WRONLY); | ||
1059 | - [...] | ||
1060 | - if (condition_hit()) { | ||
1061 | - write(trace_fd, "0", 1); | ||
1062 | - } | ||
1063 | - [...] | ||
1064 | -} | ||
1065 | - | ||
1066 | - | ||
1067 | -dynamic ftrace | ||
1068 | --------------- | ||
1069 | - | ||
1070 | -If CONFIG_DYNAMIC_FTRACE is set, then the system will run with | ||
1071 | -virtually no overhead when function tracing is disabled. The way | ||
1072 | -this works is the mcount function call (placed at the start of | ||
1073 | -every kernel function, produced by the -pg switch in gcc), starts | ||
1074 | -of pointing to a simple return. | ||
1075 | - | ||
1076 | -When dynamic ftrace is initialized, it calls kstop_machine to make it | ||
1077 | -act like a uniprocessor so that it can freely modify code without | ||
1078 | -worrying about other processors executing that same code. At | ||
1079 | -initialization, the mcount calls are change to call a "record_ip" | ||
1080 | -function. After this, the first time a kernel function is called, | ||
1081 | -it has the calling address saved in a hash table. | ||
1082 | - | ||
1083 | -Later on the ftraced kernel thread is awoken and will again call | ||
1084 | -kstop_machine if new functions have been recorded. The ftraced thread | ||
1085 | -will change all calls to mcount to "nop". Just calling mcount | ||
1086 | -and having mcount return has shown a 10% overhead. By converting | ||
1087 | -it to a nop, there is no recordable overhead to the system. | ||
1088 | - | ||
1089 | -One special side-effect to the recording of the functions being | ||
1090 | -traced, is that we can now selectively choose which functions we | ||
1091 | -want to trace and which ones we want the mcount calls to remain as | ||
1092 | -nops. | ||
1093 | - | ||
1094 | -Two files that contain to the enabling and disabling of recorded | ||
1095 | -functions are: | ||
1096 | - | ||
1097 | - set_ftrace_filter | ||
1098 | - | ||
1099 | -and | ||
1100 | - | ||
1101 | - set_ftrace_notrace | ||
1102 | - | ||
1103 | -A list of available functions that you can add to this files is listed | ||
1104 | -in: | ||
1105 | - | ||
1106 | - available_filter_functions | ||
1107 | - | ||
1108 | - # cat /debug/tracing/available_filter_functions | ||
1109 | -put_prev_task_idle | ||
1110 | -kmem_cache_create | ||
1111 | -pick_next_task_rt | ||
1112 | -get_online_cpus | ||
1113 | -pick_next_task_fair | ||
1114 | -mutex_lock | ||
1115 | -[...] | ||
1116 | - | ||
1117 | -If I'm only interested in sys_nanosleep and hrtimer_interrupt: | ||
1118 | - | ||
1119 | - # echo sys_nanosleep hrtimer_interrupt \ | ||
1120 | - > /debug/tracing/set_ftrace_filter | ||
1121 | - # echo ftrace > /debug/tracing/current_tracer | ||
1122 | - # echo 1 > /debug/tracing/tracing_enabled | ||
1123 | - # usleep 1 | ||
1124 | - # echo 0 > /debug/tracing/tracing_enabled | ||
1125 | - # cat /debug/tracing/trace | ||
1126 | -# tracer: ftrace | ||
1127 | -# | ||
1128 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
1129 | -# | | | | | | ||
1130 | - usleep-4134 [00] 1317.070017: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1131 | - usleep-4134 [00] 1317.070111: sys_nanosleep <-syscall_call | ||
1132 | - <idle>-0 [00] 1317.070115: hrtimer_interrupt <-smp_apic_timer_interrupt | ||
1133 | - | ||
1134 | -To see what functions are being traced, you can cat the file: | ||
1135 | - | ||
1136 | - # cat /debug/tracing/set_ftrace_filter | ||
1137 | -hrtimer_interrupt | ||
1138 | -sys_nanosleep | ||
1139 | - | ||
1140 | - | ||
1141 | -Perhaps this isn't enough. The filters also allow simple wild cards. | ||
1142 | -Only the following is currently available | ||
1143 | - | ||
1144 | - <match>* - will match functions that begins with <match> | ||
1145 | - *<match> - will match functions that end with <match> | ||
1146 | - *<match>* - will match functions that have <match> in it | ||
1147 | - | ||
1148 | -Thats all the wild cards that are allowed. | ||
1149 | - | ||
1150 | - <match>*<match> will not work. | ||
1151 | - | ||
1152 | - # echo hrtimer_* > /debug/tracing/set_ftrace_filter | ||
1153 | - | ||
1154 | -Produces: | ||
1155 | - | ||
1156 | -# tracer: ftrace | ||
1157 | -# | ||
1158 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
1159 | -# | | | | | | ||
1160 | - bash-4003 [00] 1480.611794: hrtimer_init <-copy_process | ||
1161 | - bash-4003 [00] 1480.611941: hrtimer_start <-hrtick_set | ||
1162 | - bash-4003 [00] 1480.611956: hrtimer_cancel <-hrtick_clear | ||
1163 | - bash-4003 [00] 1480.611956: hrtimer_try_to_cancel <-hrtimer_cancel | ||
1164 | - <idle>-0 [00] 1480.612019: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1165 | - <idle>-0 [00] 1480.612025: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1166 | - <idle>-0 [00] 1480.612032: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1167 | - <idle>-0 [00] 1480.612037: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1168 | - <idle>-0 [00] 1480.612382: hrtimer_get_next_event <-get_next_timer_interrupt | ||
1169 | - | ||
1170 | - | ||
1171 | -Notice that we lost the sys_nanosleep. | ||
1172 | - | ||
1173 | - # cat /debug/tracing/set_ftrace_filter | ||
1174 | -hrtimer_run_queues | ||
1175 | -hrtimer_run_pending | ||
1176 | -hrtimer_init | ||
1177 | -hrtimer_cancel | ||
1178 | -hrtimer_try_to_cancel | ||
1179 | -hrtimer_forward | ||
1180 | -hrtimer_start | ||
1181 | -hrtimer_reprogram | ||
1182 | -hrtimer_force_reprogram | ||
1183 | -hrtimer_get_next_event | ||
1184 | -hrtimer_interrupt | ||
1185 | -hrtimer_nanosleep | ||
1186 | -hrtimer_wakeup | ||
1187 | -hrtimer_get_remaining | ||
1188 | -hrtimer_get_res | ||
1189 | -hrtimer_init_sleeper | ||
1190 | - | ||
1191 | - | ||
1192 | -This is because the '>' and '>>' act just like they do in bash. | ||
1193 | -To rewrite the filters, use '>' | ||
1194 | -To append to the filters, use '>>' | ||
1195 | - | ||
1196 | -To clear out a filter so that all functions will be recorded again. | ||
1197 | - | ||
1198 | - # echo > /debug/tracing/set_ftrace_filter | ||
1199 | - # cat /debug/tracing/set_ftrace_filter | ||
1200 | - # | ||
1201 | - | ||
1202 | -Again, now we want to append. | ||
1203 | - | ||
1204 | - # echo sys_nanosleep > /debug/tracing/set_ftrace_filter | ||
1205 | - # cat /debug/tracing/set_ftrace_filter | ||
1206 | -sys_nanosleep | ||
1207 | - # echo hrtimer_* >> /debug/tracing/set_ftrace_filter | ||
1208 | - # cat /debug/tracing/set_ftrace_filter | ||
1209 | -hrtimer_run_queues | ||
1210 | -hrtimer_run_pending | ||
1211 | -hrtimer_init | ||
1212 | -hrtimer_cancel | ||
1213 | -hrtimer_try_to_cancel | ||
1214 | -hrtimer_forward | ||
1215 | -hrtimer_start | ||
1216 | -hrtimer_reprogram | ||
1217 | -hrtimer_force_reprogram | ||
1218 | -hrtimer_get_next_event | ||
1219 | -hrtimer_interrupt | ||
1220 | -sys_nanosleep | ||
1221 | -hrtimer_nanosleep | ||
1222 | -hrtimer_wakeup | ||
1223 | -hrtimer_get_remaining | ||
1224 | -hrtimer_get_res | ||
1225 | -hrtimer_init_sleeper | ||
1226 | - | ||
1227 | - | ||
1228 | -The set_ftrace_notrace prevents those functions from being traced. | ||
1229 | - | ||
1230 | - # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace | ||
1231 | - | ||
1232 | -Produces: | ||
1233 | - | ||
1234 | -# tracer: ftrace | ||
1235 | -# | ||
1236 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
1237 | -# | | | | | | ||
1238 | - bash-4043 [01] 115.281644: finish_task_switch <-schedule | ||
1239 | - bash-4043 [01] 115.281645: hrtick_set <-schedule | ||
1240 | - bash-4043 [01] 115.281645: hrtick_clear <-hrtick_set | ||
1241 | - bash-4043 [01] 115.281646: wait_for_completion <-__stop_machine_run | ||
1242 | - bash-4043 [01] 115.281647: wait_for_common <-wait_for_completion | ||
1243 | - bash-4043 [01] 115.281647: kthread_stop <-stop_machine_run | ||
1244 | - bash-4043 [01] 115.281648: init_waitqueue_head <-kthread_stop | ||
1245 | - bash-4043 [01] 115.281648: wake_up_process <-kthread_stop | ||
1246 | - bash-4043 [01] 115.281649: try_to_wake_up <-wake_up_process | ||
1247 | - | ||
1248 | -We can see that there's no more lock or preempt tracing. | ||
1249 | - | ||
1250 | -ftraced | ||
1251 | -------- | ||
1252 | - | ||
1253 | -As mentioned above, when dynamic ftrace is configured in, a kernel | ||
1254 | -thread wakes up once a second and checks to see if there are mcount | ||
1255 | -calls that need to be converted into nops. If there is not, then | ||
1256 | -it simply goes back to sleep. But if there is, it will call | ||
1257 | -kstop_machine to convert the calls to nops. | ||
1258 | - | ||
1259 | -There may be a case that you do not want this added latency. | ||
1260 | -Perhaps you are doing some audio recording and this activity might | ||
1261 | -cause skips in the playback. There is an interface to disable | ||
1262 | -and enable the ftraced kernel thread. | ||
1263 | - | ||
1264 | - # echo 0 > /debug/tracing/ftraced_enabled | ||
1265 | - | ||
1266 | -This will disable the calling of the kstop_machine to update the | ||
1267 | -mcount calls to nops. Remember that there's a large overhead | ||
1268 | -to calling mcount. Without this kernel thread, that overhead will | ||
1269 | -exist. | ||
1270 | - | ||
1271 | -Any write to the ftraced_enabled file will cause the kstop_machine | ||
1272 | -to run if there are recorded calls to mcount. This means that a | ||
1273 | -user can manually perform the updates when they want to by simply | ||
1274 | -echoing a '0' into the ftraced_enabled file. | ||
1275 | - | ||
1276 | -The updates are also done at the beginning of enabling a tracer | ||
1277 | -that uses ftrace function recording. | ||
1278 | - | ||
1279 | - | ||
1280 | -trace_pipe | ||
1281 | ----------- | ||
1282 | - | ||
1283 | -The trace_pipe outputs the same as trace, but the effect on the | ||
1284 | -tracing is different. Every read from trace_pipe is consumed. | ||
1285 | -This means that subsequent reads will be different. The trace | ||
1286 | -is live. | ||
1287 | - | ||
1288 | - # echo ftrace > /debug/tracing/current_tracer | ||
1289 | - # cat /debug/tracing/trace_pipe > /tmp/trace.out & | ||
1290 | -[1] 4153 | ||
1291 | - # echo 1 > /debug/tracing/tracing_enabled | ||
1292 | - # usleep 1 | ||
1293 | - # echo 0 > /debug/tracing/tracing_enabled | ||
1294 | - # cat /debug/tracing/trace | ||
1295 | -# tracer: ftrace | ||
1296 | -# | ||
1297 | -# TASK-PID CPU# TIMESTAMP FUNCTION | ||
1298 | -# | | | | | | ||
1299 | - | ||
1300 | - # | ||
1301 | - # cat /tmp/trace.out | ||
1302 | - bash-4043 [00] 41.267106: finish_task_switch <-schedule | ||
1303 | - bash-4043 [00] 41.267106: hrtick_set <-schedule | ||
1304 | - bash-4043 [00] 41.267107: hrtick_clear <-hrtick_set | ||
1305 | - bash-4043 [00] 41.267108: wait_for_completion <-__stop_machine_run | ||
1306 | - bash-4043 [00] 41.267108: wait_for_common <-wait_for_completion | ||
1307 | - bash-4043 [00] 41.267109: kthread_stop <-stop_machine_run | ||
1308 | - bash-4043 [00] 41.267109: init_waitqueue_head <-kthread_stop | ||
1309 | - bash-4043 [00] 41.267110: wake_up_process <-kthread_stop | ||
1310 | - bash-4043 [00] 41.267110: try_to_wake_up <-wake_up_process | ||
1311 | - bash-4043 [00] 41.267111: select_task_rq_rt <-try_to_wake_up | ||
1312 | - | ||
1313 | - | ||
1314 | -Note, reading the trace_pipe will block until more input is added. | ||
1315 | -By changing the tracer, trace_pipe will issue an EOF. We needed | ||
1316 | -to set the ftrace tracer _before_ cating the trace_pipe file. | ||
1317 | - | ||
1318 | - | ||
1319 | -trace entries | ||
1320 | -------------- | ||
1321 | - | ||
1322 | -Having too much or not enough data can be troublesome in diagnosing | ||
1323 | -some issue in the kernel. The file trace_entries is used to modify | ||
1324 | -the size of the internal trace buffers. The numbers listed | ||
1325 | -is the number of entries that can be recorded per CPU. To know | ||
1326 | -the full size, multiply the number of possible CPUS with the | ||
1327 | -number of entries. | ||
1328 | - | ||
1329 | - # cat /debug/tracing/trace_entries | ||
1330 | -65620 | ||
1331 | - | ||
1332 | -Note, to modify this you must have tracing fulling disabled. To do that, | ||
1333 | -echo "none" into the current_tracer. | ||
1334 | - | ||
1335 | - # echo none > /debug/tracing/current_tracer | ||
1336 | - # echo 100000 > /debug/tracing/trace_entries | ||
1337 | - # cat /debug/tracing/trace_entries | ||
1338 | -100045 | ||
1339 | - | ||
1340 | - | ||
1341 | -Notice that we echoed in 100,000 but the size is 100,045. The entries | ||
1342 | -are held by individual pages. It allocates the number of pages it takes | ||
1343 | -to fulfill the request. If more entries may fit on the last page | ||
1344 | -it will add them. | ||
1345 | - | ||
1346 | - # echo 1 > /debug/tracing/trace_entries | ||
1347 | - # cat /debug/tracing/trace_entries | ||
1348 | -85 | ||
1349 | - | ||
1350 | -This shows us that 85 entries can fit on a single page. | ||
1351 | - | ||
1352 | -The number of pages that will be allocated is a percentage of available | ||
1353 | -memory. Allocating too much will produces an error. | ||
1354 | - | ||
1355 | - # echo 1000000000000 > /debug/tracing/trace_entries | ||
1356 | --bash: echo: write error: Cannot allocate memory | ||
1357 | - # cat /debug/tracing/trace_entries | ||
1358 | -85 | ||
1359 | - | ||
1360 | diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h | ||
1361 | index 90e5627..fda05e2 100644 | ||
1362 | --- a/arch/powerpc/kernel/ppc32.h | ||
1363 | +++ b/arch/powerpc/kernel/ppc32.h | ||
1364 | @@ -135,4 +135,6 @@ struct ucontext32 { | ||
1365 | struct mcontext32 uc_mcontext; | ||
1366 | }; | ||
1367 | |||
1368 | +extern int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s); | ||
1369 | + | ||
1370 | #endif /* _PPC64_PPC32_H */ | ||
1371 | diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c | ||
1372 | index 4c1de6a..9d30e10 100644 | ||
1373 | --- a/arch/powerpc/kernel/ptrace32.c | ||
1374 | +++ b/arch/powerpc/kernel/ptrace32.c | ||
1375 | @@ -29,12 +29,15 @@ | ||
1376 | #include <linux/security.h> | ||
1377 | #include <linux/signal.h> | ||
1378 | #include <linux/compat.h> | ||
1379 | +#include <linux/elf.h> | ||
1380 | |||
1381 | #include <asm/uaccess.h> | ||
1382 | #include <asm/page.h> | ||
1383 | #include <asm/pgtable.h> | ||
1384 | #include <asm/system.h> | ||
1385 | |||
1386 | +#include "ppc32.h" | ||
1387 | + | ||
1388 | /* | ||
1389 | * does not yet catch signals sent when the child dies. | ||
1390 | * in exit.c or in signal.c. | ||
1391 | @@ -64,6 +67,27 @@ static long compat_ptrace_old(struct task_struct *child, long request, | ||
1392 | return -EPERM; | ||
1393 | } | ||
1394 | |||
1395 | +static int compat_ptrace_getsiginfo(struct task_struct *child, compat_siginfo_t __user *data) | ||
1396 | +{ | ||
1397 | + siginfo_t lastinfo; | ||
1398 | + int error = -ESRCH; | ||
1399 | + | ||
1400 | + read_lock(&tasklist_lock); | ||
1401 | + if (likely(child->sighand != NULL)) { | ||
1402 | + error = -EINVAL; | ||
1403 | + spin_lock_irq(&child->sighand->siglock); | ||
1404 | + if (likely(child->last_siginfo != NULL)) { | ||
1405 | + lastinfo = *child->last_siginfo; | ||
1406 | + error = 0; | ||
1407 | + } | ||
1408 | + spin_unlock_irq(&child->sighand->siglock); | ||
1409 | + } | ||
1410 | + read_unlock(&tasklist_lock); | ||
1411 | + if (!error) | ||
1412 | + return copy_siginfo_to_user32(data, &lastinfo); | ||
1413 | + return error; | ||
1414 | +} | ||
1415 | + | ||
1416 | long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | ||
1417 | compat_ulong_t caddr, compat_ulong_t cdata) | ||
1418 | { | ||
1419 | @@ -282,6 +306,9 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | ||
1420 | 0, PT_REGS_COUNT * sizeof(compat_long_t), | ||
1421 | compat_ptr(data)); | ||
1422 | |||
1423 | + case PTRACE_GETSIGINFO: | ||
1424 | + return compat_ptrace_getsiginfo(child, compat_ptr(data)); | ||
1425 | + | ||
1426 | case PTRACE_GETFPREGS: | ||
1427 | case PTRACE_SETFPREGS: | ||
1428 | case PTRACE_GETVRREGS: | ||
1429 | diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c | ||
1430 | index 5921e5f..1c3a66a 100644 | ||
1431 | --- a/arch/x86/kernel/io_delay.c | ||
1432 | +++ b/arch/x86/kernel/io_delay.c | ||
1433 | @@ -103,6 +103,9 @@ void __init io_delay_init(void) | ||
1434 | |||
1435 | static int __init io_delay_param(char *s) | ||
1436 | { | ||
1437 | + if (!s) | ||
1438 | + return -EINVAL; | ||
1439 | + | ||
1440 | if (!strcmp(s, "0x80")) | ||
1441 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; | ||
1442 | else if (!strcmp(s, "0xed")) | ||
1443 | diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c | ||
1444 | index b8c6743..43c019f 100644 | ||
1445 | --- a/arch/x86/kernel/kprobes.c | ||
1446 | +++ b/arch/x86/kernel/kprobes.c | ||
1447 | @@ -860,7 +860,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) | ||
1448 | |||
1449 | resume_execution(cur, regs, kcb); | ||
1450 | regs->flags |= kcb->kprobe_saved_flags; | ||
1451 | - trace_hardirqs_fixup_flags(regs->flags); | ||
1452 | |||
1453 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | ||
1454 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | ||
1455 | diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c | ||
1456 | index ba370dc..58325a6 100644 | ||
1457 | --- a/arch/x86/kernel/process.c | ||
1458 | +++ b/arch/x86/kernel/process.c | ||
1459 | @@ -164,6 +164,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | ||
1460 | |||
1461 | static int __init idle_setup(char *str) | ||
1462 | { | ||
1463 | + if (!str) | ||
1464 | + return -EINVAL; | ||
1465 | + | ||
1466 | if (!strcmp(str, "poll")) { | ||
1467 | printk("using polling idle threads.\n"); | ||
1468 | pm_idle = poll_idle; | ||
1469 | diff --git a/block/bsg.c b/block/bsg.c | ||
1470 | index 54d617f..0526471 100644 | ||
1471 | --- a/block/bsg.c | ||
1472 | +++ b/block/bsg.c | ||
1473 | @@ -725,8 +725,13 @@ static int bsg_put_device(struct bsg_device *bd) | ||
1474 | mutex_lock(&bsg_mutex); | ||
1475 | |||
1476 | do_free = atomic_dec_and_test(&bd->ref_count); | ||
1477 | - if (!do_free) | ||
1478 | + if (!do_free) { | ||
1479 | + mutex_unlock(&bsg_mutex); | ||
1480 | goto out; | ||
1481 | + } | ||
1482 | + | ||
1483 | + hlist_del(&bd->dev_list); | ||
1484 | + mutex_unlock(&bsg_mutex); | ||
1485 | |||
1486 | dprintk("%s: tearing down\n", bd->name); | ||
1487 | |||
1488 | @@ -742,10 +747,8 @@ static int bsg_put_device(struct bsg_device *bd) | ||
1489 | */ | ||
1490 | ret = bsg_complete_all_commands(bd); | ||
1491 | |||
1492 | - hlist_del(&bd->dev_list); | ||
1493 | kfree(bd); | ||
1494 | out: | ||
1495 | - mutex_unlock(&bsg_mutex); | ||
1496 | kref_put(&q->bsg_dev.ref, bsg_kref_release_function); | ||
1497 | if (do_free) | ||
1498 | blk_put_queue(q); | ||
1499 | diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h | ||
1500 | index 78eb784..7828ef2 100644 | ||
1501 | --- a/drivers/input/serio/i8042-x86ia64io.h | ||
1502 | +++ b/drivers/input/serio/i8042-x86ia64io.h | ||
1503 | @@ -63,7 +63,7 @@ static inline void i8042_write_command(int val) | ||
1504 | outb(val, I8042_COMMAND_REG); | ||
1505 | } | ||
1506 | |||
1507 | -#if defined(__i386__) || defined(__x86_64__) | ||
1508 | +#ifdef CONFIG_X86 | ||
1509 | |||
1510 | #include <linux/dmi.h> | ||
1511 | |||
1512 | @@ -291,17 +291,36 @@ static struct dmi_system_id __initdata i8042_dmi_nomux_table[] = { | ||
1513 | DMI_MATCH(DMI_PRODUCT_VERSION, "3000 N100"), | ||
1514 | }, | ||
1515 | }, | ||
1516 | + { | ||
1517 | + .ident = "Acer Aspire 1360", | ||
1518 | + .matches = { | ||
1519 | + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), | ||
1520 | + DMI_MATCH(DMI_PRODUCT_NAME, "Aspire 1360"), | ||
1521 | + }, | ||
1522 | + }, | ||
1523 | { } | ||
1524 | }; | ||
1525 | |||
1526 | - | ||
1527 | - | ||
1528 | +#ifdef CONFIG_PNP | ||
1529 | +static struct dmi_system_id __initdata i8042_dmi_nopnp_table[] = { | ||
1530 | + { | ||
1531 | + .ident = "Intel MBO Desktop D845PESV", | ||
1532 | + .matches = { | ||
1533 | + DMI_MATCH(DMI_BOARD_NAME, "D845PESV"), | ||
1534 | + DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"), | ||
1535 | + }, | ||
1536 | + }, | ||
1537 | + { | ||
1538 | + .ident = "Gericom Bellagio", | ||
1539 | + .matches = { | ||
1540 | + DMI_MATCH(DMI_SYS_VENDOR, "Gericom"), | ||
1541 | + DMI_MATCH(DMI_PRODUCT_NAME, "N34AS6"), | ||
1542 | + }, | ||
1543 | + }, | ||
1544 | + { } | ||
1545 | +}; | ||
1546 | #endif | ||
1547 | |||
1548 | -#ifdef CONFIG_X86 | ||
1549 | - | ||
1550 | -#include <linux/dmi.h> | ||
1551 | - | ||
1552 | /* | ||
1553 | * Some Wistron based laptops need us to explicitly enable the 'Dritek | ||
1554 | * keyboard extension' to make their extra keys start generating scancodes. | ||
1555 | @@ -356,7 +375,6 @@ static struct dmi_system_id __initdata i8042_dmi_dritek_table[] = { | ||
1556 | |||
1557 | #endif /* CONFIG_X86 */ | ||
1558 | |||
1559 | - | ||
1560 | #ifdef CONFIG_PNP | ||
1561 | #include <linux/pnp.h> | ||
1562 | |||
1563 | @@ -466,6 +484,11 @@ static int __init i8042_pnp_init(void) | ||
1564 | int pnp_data_busted = 0; | ||
1565 | int err; | ||
1566 | |||
1567 | +#ifdef CONFIG_X86 | ||
1568 | + if (dmi_check_system(i8042_dmi_nopnp_table)) | ||
1569 | + i8042_nopnp = 1; | ||
1570 | +#endif | ||
1571 | + | ||
1572 | if (i8042_nopnp) { | ||
1573 | printk(KERN_INFO "i8042: PNP detection disabled\n"); | ||
1574 | return 0; | ||
1575 | @@ -591,15 +614,13 @@ static int __init i8042_platform_init(void) | ||
1576 | i8042_reset = 1; | ||
1577 | #endif | ||
1578 | |||
1579 | -#if defined(__i386__) || defined(__x86_64__) | ||
1580 | +#ifdef CONFIG_X86 | ||
1581 | if (dmi_check_system(i8042_dmi_noloop_table)) | ||
1582 | i8042_noloop = 1; | ||
1583 | |||
1584 | if (dmi_check_system(i8042_dmi_nomux_table)) | ||
1585 | i8042_nomux = 1; | ||
1586 | -#endif | ||
1587 | |||
1588 | -#ifdef CONFIG_X86 | ||
1589 | if (dmi_check_system(i8042_dmi_dritek_table)) | ||
1590 | i8042_dritek = 1; | ||
1591 | #endif /* CONFIG_X86 */ | ||
1592 | diff --git a/drivers/md/linear.c b/drivers/md/linear.c | ||
1593 | index 1074824..ec921f5 100644 | ||
1594 | --- a/drivers/md/linear.c | ||
1595 | +++ b/drivers/md/linear.c | ||
1596 | @@ -126,7 +126,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | ||
1597 | int j = rdev->raid_disk; | ||
1598 | dev_info_t *disk = conf->disks + j; | ||
1599 | |||
1600 | - if (j < 0 || j > raid_disks || disk->rdev) { | ||
1601 | + if (j < 0 || j >= raid_disks || disk->rdev) { | ||
1602 | printk("linear: disk numbering problem. Aborting!\n"); | ||
1603 | goto out; | ||
1604 | } | ||
1605 | diff --git a/drivers/md/md.c b/drivers/md/md.c | ||
1606 | index 2580ac1..9664511 100644 | ||
1607 | --- a/drivers/md/md.c | ||
1608 | +++ b/drivers/md/md.c | ||
1609 | @@ -3326,9 +3326,9 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data) | ||
1610 | disk->queue = mddev->queue; | ||
1611 | add_disk(disk); | ||
1612 | mddev->gendisk = disk; | ||
1613 | - mutex_unlock(&disks_mutex); | ||
1614 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, &disk->dev.kobj, | ||
1615 | "%s", "md"); | ||
1616 | + mutex_unlock(&disks_mutex); | ||
1617 | if (error) | ||
1618 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | ||
1619 | disk->disk_name); | ||
1620 | diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c | ||
1621 | index e57905c..bc3ea09 100644 | ||
1622 | --- a/drivers/net/wireless/ath5k/base.c | ||
1623 | +++ b/drivers/net/wireless/ath5k/base.c | ||
1624 | @@ -1774,20 +1774,21 @@ ath5k_tasklet_rx(unsigned long data) | ||
1625 | struct ath5k_rx_status rs = {}; | ||
1626 | struct sk_buff *skb; | ||
1627 | struct ath5k_softc *sc = (void *)data; | ||
1628 | - struct ath5k_buf *bf; | ||
1629 | + struct ath5k_buf *bf, *bf_last; | ||
1630 | struct ath5k_desc *ds; | ||
1631 | int ret; | ||
1632 | int hdrlen; | ||
1633 | int pad; | ||
1634 | |||
1635 | spin_lock(&sc->rxbuflock); | ||
1636 | + if (list_empty(&sc->rxbuf)) { | ||
1637 | + ATH5K_WARN(sc, "empty rx buf pool\n"); | ||
1638 | + goto unlock; | ||
1639 | + } | ||
1640 | + bf_last = list_entry(sc->rxbuf.prev, struct ath5k_buf, list); | ||
1641 | do { | ||
1642 | rxs.flag = 0; | ||
1643 | |||
1644 | - if (unlikely(list_empty(&sc->rxbuf))) { | ||
1645 | - ATH5K_WARN(sc, "empty rx buf pool\n"); | ||
1646 | - break; | ||
1647 | - } | ||
1648 | bf = list_first_entry(&sc->rxbuf, struct ath5k_buf, list); | ||
1649 | BUG_ON(bf->skb == NULL); | ||
1650 | skb = bf->skb; | ||
1651 | @@ -1797,8 +1798,24 @@ ath5k_tasklet_rx(unsigned long data) | ||
1652 | pci_dma_sync_single_for_cpu(sc->pdev, sc->desc_daddr, | ||
1653 | sc->desc_len, PCI_DMA_FROMDEVICE); | ||
1654 | |||
1655 | - if (unlikely(ds->ds_link == bf->daddr)) /* this is the end */ | ||
1656 | - break; | ||
1657 | + /* | ||
1658 | + * last buffer must not be freed to ensure proper hardware | ||
1659 | + * function. When the hardware finishes also a packet next to | ||
1660 | + * it, we are sure, it doesn't use it anymore and we can go on. | ||
1661 | + */ | ||
1662 | + if (bf_last == bf) | ||
1663 | + bf->flags |= 1; | ||
1664 | + if (bf->flags) { | ||
1665 | + struct ath5k_buf *bf_next = list_entry(bf->list.next, | ||
1666 | + struct ath5k_buf, list); | ||
1667 | + ret = sc->ah->ah_proc_rx_desc(sc->ah, bf_next->desc, | ||
1668 | + &rs); | ||
1669 | + if (ret) | ||
1670 | + break; | ||
1671 | + bf->flags &= ~1; | ||
1672 | + /* skip the overwritten one (even status is martian) */ | ||
1673 | + goto next; | ||
1674 | + } | ||
1675 | |||
1676 | ret = sc->ah->ah_proc_rx_desc(sc->ah, ds, &rs); | ||
1677 | if (unlikely(ret == -EINPROGRESS)) | ||
1678 | @@ -1921,6 +1938,7 @@ accept: | ||
1679 | next: | ||
1680 | list_move_tail(&bf->list, &sc->rxbuf); | ||
1681 | } while (ath5k_rxbuf_setup(sc, bf) == 0); | ||
1682 | +unlock: | ||
1683 | spin_unlock(&sc->rxbuflock); | ||
1684 | } | ||
1685 | |||
1686 | @@ -2435,6 +2453,9 @@ ath5k_stop_hw(struct ath5k_softc *sc) | ||
1687 | mutex_unlock(&sc->lock); | ||
1688 | |||
1689 | del_timer_sync(&sc->calib_tim); | ||
1690 | + tasklet_kill(&sc->rxtq); | ||
1691 | + tasklet_kill(&sc->txtq); | ||
1692 | + tasklet_kill(&sc->restq); | ||
1693 | |||
1694 | return ret; | ||
1695 | } | ||
1696 | diff --git a/drivers/net/wireless/ath5k/base.h b/drivers/net/wireless/ath5k/base.h | ||
1697 | index 3a97558..4badca7 100644 | ||
1698 | --- a/drivers/net/wireless/ath5k/base.h | ||
1699 | +++ b/drivers/net/wireless/ath5k/base.h | ||
1700 | @@ -55,7 +55,7 @@ | ||
1701 | |||
1702 | struct ath5k_buf { | ||
1703 | struct list_head list; | ||
1704 | - unsigned int flags; /* tx descriptor flags */ | ||
1705 | + unsigned int flags; /* rx descriptor flags */ | ||
1706 | struct ath5k_desc *desc; /* virtual addr of desc */ | ||
1707 | dma_addr_t daddr; /* physical addr of desc */ | ||
1708 | struct sk_buff *skb; /* skbuff for buf */ | ||
1709 | diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c | ||
1710 | index c4b938b..2be2da6 100644 | ||
1711 | --- a/drivers/scsi/ch.c | ||
1712 | +++ b/drivers/scsi/ch.c | ||
1713 | @@ -926,6 +926,7 @@ static int ch_probe(struct device *dev) | ||
1714 | if (init) | ||
1715 | ch_init_elem(ch); | ||
1716 | |||
1717 | + dev_set_drvdata(dev, ch); | ||
1718 | sdev_printk(KERN_INFO, sd, "Attached scsi changer %s\n", ch->name); | ||
1719 | |||
1720 | return 0; | ||
1721 | diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c | ||
1722 | index 67ff202..8dee320 100644 | ||
1723 | --- a/fs/jbd/transaction.c | ||
1724 | +++ b/fs/jbd/transaction.c | ||
1725 | @@ -1648,12 +1648,42 @@ out: | ||
1726 | return; | ||
1727 | } | ||
1728 | |||
1729 | +/* | ||
1730 | + * journal_try_to_free_buffers() could race with journal_commit_transaction() | ||
1731 | + * The latter might still hold the a count on buffers when inspecting | ||
1732 | + * them on t_syncdata_list or t_locked_list. | ||
1733 | + * | ||
1734 | + * journal_try_to_free_buffers() will call this function to | ||
1735 | + * wait for the current transaction to finish syncing data buffers, before | ||
1736 | + * tryinf to free that buffer. | ||
1737 | + * | ||
1738 | + * Called with journal->j_state_lock held. | ||
1739 | + */ | ||
1740 | +static void journal_wait_for_transaction_sync_data(journal_t *journal) | ||
1741 | +{ | ||
1742 | + transaction_t *transaction = NULL; | ||
1743 | + tid_t tid; | ||
1744 | + | ||
1745 | + spin_lock(&journal->j_state_lock); | ||
1746 | + transaction = journal->j_committing_transaction; | ||
1747 | + | ||
1748 | + if (!transaction) { | ||
1749 | + spin_unlock(&journal->j_state_lock); | ||
1750 | + return; | ||
1751 | + } | ||
1752 | + | ||
1753 | + tid = transaction->t_tid; | ||
1754 | + spin_unlock(&journal->j_state_lock); | ||
1755 | + log_wait_commit(journal, tid); | ||
1756 | +} | ||
1757 | |||
1758 | /** | ||
1759 | * int journal_try_to_free_buffers() - try to free page buffers. | ||
1760 | * @journal: journal for operation | ||
1761 | * @page: to try and free | ||
1762 | - * @unused_gfp_mask: unused | ||
1763 | + * @gfp_mask: we use the mask to detect how hard should we try to release | ||
1764 | + * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to | ||
1765 | + * release the buffers. | ||
1766 | * | ||
1767 | * | ||
1768 | * For all the buffers on this page, | ||
1769 | @@ -1682,9 +1712,11 @@ out: | ||
1770 | * journal_try_to_free_buffer() is changing its state. But that | ||
1771 | * cannot happen because we never reallocate freed data as metadata | ||
1772 | * while the data is part of a transaction. Yes? | ||
1773 | + * | ||
1774 | + * Return 0 on failure, 1 on success | ||
1775 | */ | ||
1776 | int journal_try_to_free_buffers(journal_t *journal, | ||
1777 | - struct page *page, gfp_t unused_gfp_mask) | ||
1778 | + struct page *page, gfp_t gfp_mask) | ||
1779 | { | ||
1780 | struct buffer_head *head; | ||
1781 | struct buffer_head *bh; | ||
1782 | @@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal, | ||
1783 | if (buffer_jbd(bh)) | ||
1784 | goto busy; | ||
1785 | } while ((bh = bh->b_this_page) != head); | ||
1786 | + | ||
1787 | ret = try_to_free_buffers(page); | ||
1788 | + | ||
1789 | + /* | ||
1790 | + * There are a number of places where journal_try_to_free_buffers() | ||
1791 | + * could race with journal_commit_transaction(), the later still | ||
1792 | + * holds the reference to the buffers to free while processing them. | ||
1793 | + * try_to_free_buffers() failed to free those buffers. Some of the | ||
1794 | + * caller of releasepage() request page buffers to be dropped, otherwise | ||
1795 | + * treat the fail-to-free as errors (such as generic_file_direct_IO()) | ||
1796 | + * | ||
1797 | + * So, if the caller of try_to_release_page() wants the synchronous | ||
1798 | + * behaviour(i.e make sure buffers are dropped upon return), | ||
1799 | + * let's wait for the current transaction to finish flush of | ||
1800 | + * dirty data buffers, then try to free those buffers again, | ||
1801 | + * with the journal locked. | ||
1802 | + */ | ||
1803 | + if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) { | ||
1804 | + journal_wait_for_transaction_sync_data(journal); | ||
1805 | + ret = try_to_free_buffers(page); | ||
1806 | + } | ||
1807 | + | ||
1808 | busy: | ||
1809 | return ret; | ||
1810 | } | ||
1811 | diff --git a/fs/namei.c b/fs/namei.c | ||
1812 | index 01e67dd..3b26a24 100644 | ||
1813 | --- a/fs/namei.c | ||
1814 | +++ b/fs/namei.c | ||
1815 | @@ -519,7 +519,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s | ||
1816 | */ | ||
1817 | result = d_lookup(parent, name); | ||
1818 | if (!result) { | ||
1819 | - struct dentry * dentry = d_alloc(parent, name); | ||
1820 | + struct dentry *dentry; | ||
1821 | + | ||
1822 | + /* Don't create child dentry for a dead directory. */ | ||
1823 | + result = ERR_PTR(-ENOENT); | ||
1824 | + if (IS_DEADDIR(dir)) | ||
1825 | + goto out_unlock; | ||
1826 | + | ||
1827 | + dentry = d_alloc(parent, name); | ||
1828 | result = ERR_PTR(-ENOMEM); | ||
1829 | if (dentry) { | ||
1830 | result = dir->i_op->lookup(dir, dentry, nd); | ||
1831 | @@ -528,6 +535,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s | ||
1832 | else | ||
1833 | result = dentry; | ||
1834 | } | ||
1835 | +out_unlock: | ||
1836 | mutex_unlock(&dir->i_mutex); | ||
1837 | return result; | ||
1838 | } | ||
1839 | @@ -1317,7 +1325,14 @@ static struct dentry *__lookup_hash(struct qstr *name, | ||
1840 | |||
1841 | dentry = cached_lookup(base, name, nd); | ||
1842 | if (!dentry) { | ||
1843 | - struct dentry *new = d_alloc(base, name); | ||
1844 | + struct dentry *new; | ||
1845 | + | ||
1846 | + /* Don't create child dentry for a dead directory. */ | ||
1847 | + dentry = ERR_PTR(-ENOENT); | ||
1848 | + if (IS_DEADDIR(inode)) | ||
1849 | + goto out; | ||
1850 | + | ||
1851 | + new = d_alloc(base, name); | ||
1852 | dentry = ERR_PTR(-ENOMEM); | ||
1853 | if (!new) | ||
1854 | goto out; | ||
1855 | diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c | ||
1856 | index 596c5d8..1d7ac64 100644 | ||
1857 | --- a/fs/nfs/inode.c | ||
1858 | +++ b/fs/nfs/inode.c | ||
1859 | @@ -57,8 +57,6 @@ static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; | ||
1860 | static void nfs_invalidate_inode(struct inode *); | ||
1861 | static int nfs_update_inode(struct inode *, struct nfs_fattr *); | ||
1862 | |||
1863 | -static void nfs_zap_acl_cache(struct inode *); | ||
1864 | - | ||
1865 | static struct kmem_cache * nfs_inode_cachep; | ||
1866 | |||
1867 | static inline unsigned long | ||
1868 | @@ -167,7 +165,7 @@ void nfs_zap_mapping(struct inode *inode, struct address_space *mapping) | ||
1869 | } | ||
1870 | } | ||
1871 | |||
1872 | -static void nfs_zap_acl_cache(struct inode *inode) | ||
1873 | +void nfs_zap_acl_cache(struct inode *inode) | ||
1874 | { | ||
1875 | void (*clear_acl_cache)(struct inode *); | ||
1876 | |||
1877 | diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h | ||
1878 | index 04ae867..24241fc 100644 | ||
1879 | --- a/fs/nfs/internal.h | ||
1880 | +++ b/fs/nfs/internal.h | ||
1881 | @@ -150,6 +150,7 @@ extern void nfs_clear_inode(struct inode *); | ||
1882 | #ifdef CONFIG_NFS_V4 | ||
1883 | extern void nfs4_clear_inode(struct inode *); | ||
1884 | #endif | ||
1885 | +void nfs_zap_acl_cache(struct inode *inode); | ||
1886 | |||
1887 | /* super.c */ | ||
1888 | extern struct file_system_type nfs_xdev_fs_type; | ||
1889 | diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c | ||
1890 | index 9b73625..423842f 100644 | ||
1891 | --- a/fs/nfs/nfs3acl.c | ||
1892 | +++ b/fs/nfs/nfs3acl.c | ||
1893 | @@ -5,6 +5,8 @@ | ||
1894 | #include <linux/posix_acl_xattr.h> | ||
1895 | #include <linux/nfsacl.h> | ||
1896 | |||
1897 | +#include "internal.h" | ||
1898 | + | ||
1899 | #define NFSDBG_FACILITY NFSDBG_PROC | ||
1900 | |||
1901 | ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) | ||
1902 | @@ -205,6 +207,8 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | ||
1903 | status = nfs_revalidate_inode(server, inode); | ||
1904 | if (status < 0) | ||
1905 | return ERR_PTR(status); | ||
1906 | + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) | ||
1907 | + nfs_zap_acl_cache(inode); | ||
1908 | acl = nfs3_get_cached_acl(inode, type); | ||
1909 | if (acl != ERR_PTR(-EAGAIN)) | ||
1910 | return acl; | ||
1911 | @@ -319,9 +323,8 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | ||
1912 | dprintk("NFS call setacl\n"); | ||
1913 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | ||
1914 | status = rpc_call_sync(server->client_acl, &msg, 0); | ||
1915 | - spin_lock(&inode->i_lock); | ||
1916 | - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; | ||
1917 | - spin_unlock(&inode->i_lock); | ||
1918 | + nfs_access_zap_cache(inode); | ||
1919 | + nfs_zap_acl_cache(inode); | ||
1920 | dprintk("NFS reply setacl: %d\n", status); | ||
1921 | |||
1922 | /* pages may have been allocated at the xdr layer. */ | ||
1923 | diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c | ||
1924 | index 1293e0a..806d17f 100644 | ||
1925 | --- a/fs/nfs/nfs4proc.c | ||
1926 | +++ b/fs/nfs/nfs4proc.c | ||
1927 | @@ -2706,6 +2706,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) | ||
1928 | ret = nfs_revalidate_inode(server, inode); | ||
1929 | if (ret < 0) | ||
1930 | return ret; | ||
1931 | + if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) | ||
1932 | + nfs_zap_acl_cache(inode); | ||
1933 | ret = nfs4_read_cached_acl(inode, buf, buflen); | ||
1934 | if (ret != -ENOENT) | ||
1935 | return ret; | ||
1936 | @@ -2733,7 +2735,8 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl | ||
1937 | nfs_inode_return_delegation(inode); | ||
1938 | buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); | ||
1939 | ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); | ||
1940 | - nfs_zap_caches(inode); | ||
1941 | + nfs_access_zap_cache(inode); | ||
1942 | + nfs_zap_acl_cache(inode); | ||
1943 | return ret; | ||
1944 | } | ||
1945 | |||
1946 | diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c | ||
1947 | index 3f13d49..35e5c6e 100644 | ||
1948 | --- a/fs/romfs/inode.c | ||
1949 | +++ b/fs/romfs/inode.c | ||
1950 | @@ -418,7 +418,8 @@ static int | ||
1951 | romfs_readpage(struct file *file, struct page * page) | ||
1952 | { | ||
1953 | struct inode *inode = page->mapping->host; | ||
1954 | - loff_t offset, avail, readlen; | ||
1955 | + loff_t offset, size; | ||
1956 | + unsigned long filled; | ||
1957 | void *buf; | ||
1958 | int result = -EIO; | ||
1959 | |||
1960 | @@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page) | ||
1961 | |||
1962 | /* 32 bit warning -- but not for us :) */ | ||
1963 | offset = page_offset(page); | ||
1964 | - if (offset < i_size_read(inode)) { | ||
1965 | - avail = inode->i_size-offset; | ||
1966 | - readlen = min_t(unsigned long, avail, PAGE_SIZE); | ||
1967 | - if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { | ||
1968 | - if (readlen < PAGE_SIZE) { | ||
1969 | - memset(buf + readlen,0,PAGE_SIZE-readlen); | ||
1970 | - } | ||
1971 | - SetPageUptodate(page); | ||
1972 | - result = 0; | ||
1973 | + size = i_size_read(inode); | ||
1974 | + filled = 0; | ||
1975 | + result = 0; | ||
1976 | + if (offset < size) { | ||
1977 | + unsigned long readlen; | ||
1978 | + | ||
1979 | + size -= offset; | ||
1980 | + readlen = size > PAGE_SIZE ? PAGE_SIZE : size; | ||
1981 | + | ||
1982 | + filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen); | ||
1983 | + | ||
1984 | + if (filled != readlen) { | ||
1985 | + SetPageError(page); | ||
1986 | + filled = 0; | ||
1987 | + result = -EIO; | ||
1988 | } | ||
1989 | } | ||
1990 | - if (result) { | ||
1991 | - memset(buf, 0, PAGE_SIZE); | ||
1992 | - SetPageError(page); | ||
1993 | - } | ||
1994 | + | ||
1995 | + if (filled < PAGE_SIZE) | ||
1996 | + memset(buf + filled, 0, PAGE_SIZE-filled); | ||
1997 | + | ||
1998 | + if (!result) | ||
1999 | + SetPageUptodate(page); | ||
2000 | flush_dcache_page(page); | ||
2001 | |||
2002 | unlock_page(page); | ||
2003 | diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h | ||
2004 | index 7b7b9b1..10ee28e 100644 | ||
2005 | --- a/include/sound/emu10k1.h | ||
2006 | +++ b/include/sound/emu10k1.h | ||
2007 | @@ -1670,6 +1670,7 @@ struct snd_emu_chip_details { | ||
2008 | unsigned char spi_dac; /* SPI interface for DAC */ | ||
2009 | unsigned char i2c_adc; /* I2C interface for ADC */ | ||
2010 | unsigned char adc_1361t; /* Use Philips 1361T ADC */ | ||
2011 | + unsigned char invert_shared_spdif; /* analog/digital switch inverted */ | ||
2012 | const char *driver; | ||
2013 | const char *name; | ||
2014 | const char *id; /* for backward compatibility - can be NULL if not needed */ | ||
2015 | diff --git a/mm/filemap.c b/mm/filemap.c | ||
2016 | index 4f32423..afb991a 100644 | ||
2017 | --- a/mm/filemap.c | ||
2018 | +++ b/mm/filemap.c | ||
2019 | @@ -2581,9 +2581,8 @@ out: | ||
2020 | * Otherwise return zero. | ||
2021 | * | ||
2022 | * The @gfp_mask argument specifies whether I/O may be performed to release | ||
2023 | - * this page (__GFP_IO), and whether the call may block (__GFP_WAIT). | ||
2024 | + * this page (__GFP_IO), and whether the call may block (__GFP_WAIT & __GFP_FS). | ||
2025 | * | ||
2026 | - * NOTE: @gfp_mask may go away, and this function may become non-blocking. | ||
2027 | */ | ||
2028 | int try_to_release_page(struct page *page, gfp_t gfp_mask) | ||
2029 | { | ||
2030 | diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c | ||
2031 | index f85d946..24e91eb 100644 | ||
2032 | --- a/net/bluetooth/bnep/core.c | ||
2033 | +++ b/net/bluetooth/bnep/core.c | ||
2034 | @@ -507,6 +507,11 @@ static int bnep_session(void *arg) | ||
2035 | /* Delete network device */ | ||
2036 | unregister_netdev(dev); | ||
2037 | |||
2038 | + /* Wakeup user-space polling for socket errors */ | ||
2039 | + s->sock->sk->sk_err = EUNATCH; | ||
2040 | + | ||
2041 | + wake_up_interruptible(s->sock->sk->sk_sleep); | ||
2042 | + | ||
2043 | /* Release the socket */ | ||
2044 | fput(s->sock->file); | ||
2045 | |||
2046 | diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c | ||
2047 | index 519cdb9..96434d7 100644 | ||
2048 | --- a/net/bluetooth/hidp/core.c | ||
2049 | +++ b/net/bluetooth/hidp/core.c | ||
2050 | @@ -581,6 +581,12 @@ static int hidp_session(void *arg) | ||
2051 | hid_free_device(session->hid); | ||
2052 | } | ||
2053 | |||
2054 | + /* Wakeup user-space polling for socket errors */ | ||
2055 | + session->intr_sock->sk->sk_err = EUNATCH; | ||
2056 | + session->ctrl_sock->sk->sk_err = EUNATCH; | ||
2057 | + | ||
2058 | + hidp_schedule(session); | ||
2059 | + | ||
2060 | fput(session->intr_sock->file); | ||
2061 | |||
2062 | wait_event_timeout(*(ctrl_sk->sk_sleep), | ||
2063 | @@ -879,6 +885,10 @@ int hidp_del_connection(struct hidp_conndel_req *req) | ||
2064 | skb_queue_purge(&session->ctrl_transmit); | ||
2065 | skb_queue_purge(&session->intr_transmit); | ||
2066 | |||
2067 | + /* Wakeup user-space polling for socket errors */ | ||
2068 | + session->intr_sock->sk->sk_err = EUNATCH; | ||
2069 | + session->ctrl_sock->sk->sk_err = EUNATCH; | ||
2070 | + | ||
2071 | /* Kill session thread */ | ||
2072 | atomic_inc(&session->terminate); | ||
2073 | hidp_schedule(session); | ||
2074 | diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c | ||
2075 | index 4334d5c..1454432 100644 | ||
2076 | --- a/net/ipv4/netfilter/nf_nat_sip.c | ||
2077 | +++ b/net/ipv4/netfilter/nf_nat_sip.c | ||
2078 | @@ -318,11 +318,11 @@ static int mangle_content_len(struct sk_buff *skb, | ||
2079 | buffer, buflen); | ||
2080 | } | ||
2081 | |||
2082 | -static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | ||
2083 | - unsigned int dataoff, unsigned int *datalen, | ||
2084 | - enum sdp_header_types type, | ||
2085 | - enum sdp_header_types term, | ||
2086 | - char *buffer, int buflen) | ||
2087 | +static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | ||
2088 | + unsigned int dataoff, unsigned int *datalen, | ||
2089 | + enum sdp_header_types type, | ||
2090 | + enum sdp_header_types term, | ||
2091 | + char *buffer, int buflen) | ||
2092 | { | ||
2093 | enum ip_conntrack_info ctinfo; | ||
2094 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | ||
2095 | @@ -330,9 +330,9 @@ static unsigned mangle_sdp_packet(struct sk_buff *skb, const char **dptr, | ||
2096 | |||
2097 | if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term, | ||
2098 | &matchoff, &matchlen) <= 0) | ||
2099 | - return 0; | ||
2100 | + return -ENOENT; | ||
2101 | return mangle_packet(skb, dptr, datalen, matchoff, matchlen, | ||
2102 | - buffer, buflen); | ||
2103 | + buffer, buflen) ? 0 : -EINVAL; | ||
2104 | } | ||
2105 | |||
2106 | static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | ||
2107 | @@ -346,8 +346,8 @@ static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr, | ||
2108 | unsigned int buflen; | ||
2109 | |||
2110 | buflen = sprintf(buffer, NIPQUAD_FMT, NIPQUAD(addr->ip)); | ||
2111 | - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | ||
2112 | - buffer, buflen)) | ||
2113 | + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term, | ||
2114 | + buffer, buflen)) | ||
2115 | return 0; | ||
2116 | |||
2117 | return mangle_content_len(skb, dptr, datalen); | ||
2118 | @@ -381,15 +381,27 @@ static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr, | ||
2119 | |||
2120 | /* Mangle session description owner and contact addresses */ | ||
2121 | buflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(addr->ip)); | ||
2122 | - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
2123 | + if (mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
2124 | SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA, | ||
2125 | buffer, buflen)) | ||
2126 | return 0; | ||
2127 | |||
2128 | - if (!mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
2129 | - SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | ||
2130 | - buffer, buflen)) | ||
2131 | + switch (mangle_sdp_packet(skb, dptr, dataoff, datalen, | ||
2132 | + SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA, | ||
2133 | + buffer, buflen)) { | ||
2134 | + case 0: | ||
2135 | + /* | ||
2136 | + * RFC 2327: | ||
2137 | + * | ||
2138 | + * Session description | ||
2139 | + * | ||
2140 | + * c=* (connection information - not required if included in all media) | ||
2141 | + */ | ||
2142 | + case -ENOENT: | ||
2143 | + break; | ||
2144 | + default: | ||
2145 | return 0; | ||
2146 | + } | ||
2147 | |||
2148 | return mangle_content_len(skb, dptr, datalen); | ||
2149 | } | ||
2150 | diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c | ||
2151 | index ed76baa..9f32859 100644 | ||
2152 | --- a/net/netfilter/xt_time.c | ||
2153 | +++ b/net/netfilter/xt_time.c | ||
2154 | @@ -173,7 +173,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, | ||
2155 | __net_timestamp((struct sk_buff *)skb); | ||
2156 | |||
2157 | stamp = ktime_to_ns(skb->tstamp); | ||
2158 | - do_div(stamp, NSEC_PER_SEC); | ||
2159 | + stamp = div_s64(stamp, NSEC_PER_SEC); | ||
2160 | |||
2161 | if (info->flags & XT_TIME_LOCAL_TZ) | ||
2162 | /* Adjust for local timezone */ | ||
2163 | diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c | ||
2164 | index 558dadb..e024e45 100644 | ||
2165 | --- a/sound/core/seq/oss/seq_oss_synth.c | ||
2166 | +++ b/sound/core/seq/oss/seq_oss_synth.c | ||
2167 | @@ -604,6 +604,9 @@ snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_in | ||
2168 | { | ||
2169 | struct seq_oss_synth *rec; | ||
2170 | |||
2171 | + if (dev < 0 || dev >= dp->max_synthdev) | ||
2172 | + return -ENXIO; | ||
2173 | + | ||
2174 | if (dp->synths[dev].is_midi) { | ||
2175 | struct midi_info minf; | ||
2176 | snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); | ||
2177 | diff --git a/sound/pci/emu10k1/emu10k1_main.c b/sound/pci/emu10k1/emu10k1_main.c | ||
2178 | index 548c9cc..2f283ea 100644 | ||
2179 | --- a/sound/pci/emu10k1/emu10k1_main.c | ||
2180 | +++ b/sound/pci/emu10k1/emu10k1_main.c | ||
2181 | @@ -1528,6 +1528,7 @@ static struct snd_emu_chip_details emu_chip_details[] = { | ||
2182 | .ca0151_chip = 1, | ||
2183 | .spk71 = 1, | ||
2184 | .spdif_bug = 1, | ||
2185 | + .invert_shared_spdif = 1, /* digital/analog switch swapped */ | ||
2186 | .adc_1361t = 1, /* 24 bit capture instead of 16bit. Fixes ALSA bug#324 */ | ||
2187 | .ac97_chip = 1} , | ||
2188 | {.vendor = 0x1102, .device = 0x0004, .revision = 0x04, | ||
2189 | diff --git a/sound/pci/emu10k1/emumixer.c b/sound/pci/emu10k1/emumixer.c | ||
2190 | index fd22120..9f77692 100644 | ||
2191 | --- a/sound/pci/emu10k1/emumixer.c | ||
2192 | +++ b/sound/pci/emu10k1/emumixer.c | ||
2193 | @@ -1578,6 +1578,10 @@ static int snd_emu10k1_shared_spdif_get(struct snd_kcontrol *kcontrol, | ||
2194 | ucontrol->value.integer.value[0] = inl(emu->port + A_IOCFG) & A_IOCFG_GPOUT0 ? 1 : 0; | ||
2195 | else | ||
2196 | ucontrol->value.integer.value[0] = inl(emu->port + HCFG) & HCFG_GPOUT0 ? 1 : 0; | ||
2197 | + if (emu->card_capabilities->invert_shared_spdif) | ||
2198 | + ucontrol->value.integer.value[0] = | ||
2199 | + !ucontrol->value.integer.value[0]; | ||
2200 | + | ||
2201 | return 0; | ||
2202 | } | ||
2203 | |||
2204 | @@ -1586,15 +1590,18 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, | ||
2205 | { | ||
2206 | unsigned long flags; | ||
2207 | struct snd_emu10k1 *emu = snd_kcontrol_chip(kcontrol); | ||
2208 | - unsigned int reg, val; | ||
2209 | + unsigned int reg, val, sw; | ||
2210 | int change = 0; | ||
2211 | |||
2212 | + sw = ucontrol->value.integer.value[0]; | ||
2213 | + if (emu->card_capabilities->invert_shared_spdif) | ||
2214 | + sw = !sw; | ||
2215 | spin_lock_irqsave(&emu->reg_lock, flags); | ||
2216 | if ( emu->card_capabilities->i2c_adc) { | ||
2217 | /* Do nothing for Audigy 2 ZS Notebook */ | ||
2218 | } else if (emu->audigy) { | ||
2219 | reg = inl(emu->port + A_IOCFG); | ||
2220 | - val = ucontrol->value.integer.value[0] ? A_IOCFG_GPOUT0 : 0; | ||
2221 | + val = sw ? A_IOCFG_GPOUT0 : 0; | ||
2222 | change = (reg & A_IOCFG_GPOUT0) != val; | ||
2223 | if (change) { | ||
2224 | reg &= ~A_IOCFG_GPOUT0; | ||
2225 | @@ -1603,7 +1610,7 @@ static int snd_emu10k1_shared_spdif_put(struct snd_kcontrol *kcontrol, | ||
2226 | } | ||
2227 | } | ||
2228 | reg = inl(emu->port + HCFG); | ||
2229 | - val = ucontrol->value.integer.value[0] ? HCFG_GPOUT0 : 0; | ||
2230 | + val = sw ? HCFG_GPOUT0 : 0; | ||
2231 | change |= (reg & HCFG_GPOUT0) != val; | ||
2232 | if (change) { | ||
2233 | reg &= ~HCFG_GPOUT0; | ||
2234 | diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c | ||
2235 | index b3a618e..6ba7ac0 100644 | ||
2236 | --- a/sound/pci/hda/hda_intel.c | ||
2237 | +++ b/sound/pci/hda/hda_intel.c | ||
2238 | @@ -285,6 +285,7 @@ struct azx_dev { | ||
2239 | u32 *posbuf; /* position buffer pointer */ | ||
2240 | |||
2241 | unsigned int bufsize; /* size of the play buffer in bytes */ | ||
2242 | + unsigned int period_bytes; /* size of the period in bytes */ | ||
2243 | unsigned int frags; /* number for period in the play buffer */ | ||
2244 | unsigned int fifo_size; /* FIFO size */ | ||
2245 | |||
2246 | @@ -301,11 +302,10 @@ struct azx_dev { | ||
2247 | */ | ||
2248 | unsigned char stream_tag; /* assigned stream */ | ||
2249 | unsigned char index; /* stream index */ | ||
2250 | - /* for sanity check of position buffer */ | ||
2251 | - unsigned int period_intr; | ||
2252 | |||
2253 | unsigned int opened :1; | ||
2254 | unsigned int running :1; | ||
2255 | + unsigned int irq_pending: 1; | ||
2256 | }; | ||
2257 | |||
2258 | /* CORB/RIRB */ | ||
2259 | @@ -369,6 +369,9 @@ struct azx { | ||
2260 | |||
2261 | /* for debugging */ | ||
2262 | unsigned int last_cmd; /* last issued command (to sync) */ | ||
2263 | + | ||
2264 | + /* for pending irqs */ | ||
2265 | + struct work_struct irq_pending_work; | ||
2266 | }; | ||
2267 | |||
2268 | /* driver types */ | ||
2269 | @@ -908,6 +911,8 @@ static void azx_init_pci(struct azx *chip) | ||
2270 | } | ||
2271 | |||
2272 | |||
2273 | +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev); | ||
2274 | + | ||
2275 | /* | ||
2276 | * interrupt handler | ||
2277 | */ | ||
2278 | @@ -930,11 +935,18 @@ static irqreturn_t azx_interrupt(int irq, void *dev_id) | ||
2279 | azx_dev = &chip->azx_dev[i]; | ||
2280 | if (status & azx_dev->sd_int_sta_mask) { | ||
2281 | azx_sd_writeb(azx_dev, SD_STS, SD_INT_MASK); | ||
2282 | - if (azx_dev->substream && azx_dev->running) { | ||
2283 | - azx_dev->period_intr++; | ||
2284 | + if (!azx_dev->substream || !azx_dev->running) | ||
2285 | + continue; | ||
2286 | + /* check whether this IRQ is really acceptable */ | ||
2287 | + if (azx_position_ok(chip, azx_dev)) { | ||
2288 | + azx_dev->irq_pending = 0; | ||
2289 | spin_unlock(&chip->reg_lock); | ||
2290 | snd_pcm_period_elapsed(azx_dev->substream); | ||
2291 | spin_lock(&chip->reg_lock); | ||
2292 | + } else { | ||
2293 | + /* bogus IRQ, process it later */ | ||
2294 | + azx_dev->irq_pending = 1; | ||
2295 | + schedule_work(&chip->irq_pending_work); | ||
2296 | } | ||
2297 | } | ||
2298 | } | ||
2299 | @@ -973,6 +985,7 @@ static int azx_setup_periods(struct snd_pcm_substream *substream, | ||
2300 | azx_sd_writel(azx_dev, SD_BDLPU, 0); | ||
2301 | |||
2302 | period_bytes = snd_pcm_lib_period_bytes(substream); | ||
2303 | + azx_dev->period_bytes = period_bytes; | ||
2304 | periods = azx_dev->bufsize / period_bytes; | ||
2305 | |||
2306 | /* program the initial BDL entries */ | ||
2307 | @@ -1421,27 +1434,16 @@ static int azx_pcm_trigger(struct snd_pcm_substream *substream, int cmd) | ||
2308 | return 0; | ||
2309 | } | ||
2310 | |||
2311 | -static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) | ||
2312 | +static unsigned int azx_get_position(struct azx *chip, | ||
2313 | + struct azx_dev *azx_dev) | ||
2314 | { | ||
2315 | - struct azx_pcm *apcm = snd_pcm_substream_chip(substream); | ||
2316 | - struct azx *chip = apcm->chip; | ||
2317 | - struct azx_dev *azx_dev = get_azx_dev(substream); | ||
2318 | unsigned int pos; | ||
2319 | |||
2320 | if (chip->position_fix == POS_FIX_POSBUF || | ||
2321 | chip->position_fix == POS_FIX_AUTO) { | ||
2322 | /* use the position buffer */ | ||
2323 | pos = le32_to_cpu(*azx_dev->posbuf); | ||
2324 | - if (chip->position_fix == POS_FIX_AUTO && | ||
2325 | - azx_dev->period_intr == 1 && !pos) { | ||
2326 | - printk(KERN_WARNING | ||
2327 | - "hda-intel: Invalid position buffer, " | ||
2328 | - "using LPIB read method instead.\n"); | ||
2329 | - chip->position_fix = POS_FIX_NONE; | ||
2330 | - goto read_lpib; | ||
2331 | - } | ||
2332 | } else { | ||
2333 | - read_lpib: | ||
2334 | /* read LPIB */ | ||
2335 | pos = azx_sd_readl(azx_dev, SD_LPIB); | ||
2336 | if (chip->position_fix == POS_FIX_FIFO) | ||
2337 | @@ -1449,7 +1451,90 @@ static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) | ||
2338 | } | ||
2339 | if (pos >= azx_dev->bufsize) | ||
2340 | pos = 0; | ||
2341 | - return bytes_to_frames(substream->runtime, pos); | ||
2342 | + return pos; | ||
2343 | +} | ||
2344 | + | ||
2345 | +static snd_pcm_uframes_t azx_pcm_pointer(struct snd_pcm_substream *substream) | ||
2346 | +{ | ||
2347 | + struct azx_pcm *apcm = snd_pcm_substream_chip(substream); | ||
2348 | + struct azx *chip = apcm->chip; | ||
2349 | + struct azx_dev *azx_dev = get_azx_dev(substream); | ||
2350 | + return bytes_to_frames(substream->runtime, | ||
2351 | + azx_get_position(chip, azx_dev)); | ||
2352 | +} | ||
2353 | + | ||
2354 | +/* | ||
2355 | + * Check whether the current DMA position is acceptable for updating | ||
2356 | + * periods. Returns non-zero if it's OK. | ||
2357 | + * | ||
2358 | + * Many HD-audio controllers appear pretty inaccurate about | ||
2359 | + * the update-IRQ timing. The IRQ is issued before actually the | ||
2360 | + * data is processed. So, we need to process it afterwords in a | ||
2361 | + * workqueue. | ||
2362 | + */ | ||
2363 | +static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) | ||
2364 | +{ | ||
2365 | + unsigned int pos; | ||
2366 | + | ||
2367 | + pos = azx_get_position(chip, azx_dev); | ||
2368 | + if (chip->position_fix == POS_FIX_AUTO) { | ||
2369 | + if (!pos) { | ||
2370 | + printk(KERN_WARNING | ||
2371 | + "hda-intel: Invalid position buffer, " | ||
2372 | + "using LPIB read method instead.\n"); | ||
2373 | + chip->position_fix = POS_FIX_NONE; | ||
2374 | + pos = azx_get_position(chip, azx_dev); | ||
2375 | + } else | ||
2376 | + chip->position_fix = POS_FIX_POSBUF; | ||
2377 | + } | ||
2378 | + | ||
2379 | + if (pos % azx_dev->period_bytes > azx_dev->period_bytes / 2) | ||
2380 | + return 0; /* NG - it's below the period boundary */ | ||
2381 | + return 1; /* OK, it's fine */ | ||
2382 | +} | ||
2383 | + | ||
2384 | +/* | ||
2385 | + * The work for pending PCM period updates. | ||
2386 | + */ | ||
2387 | +static void azx_irq_pending_work(struct work_struct *work) | ||
2388 | +{ | ||
2389 | + struct azx *chip = container_of(work, struct azx, irq_pending_work); | ||
2390 | + int i, pending; | ||
2391 | + | ||
2392 | + for (;;) { | ||
2393 | + pending = 0; | ||
2394 | + spin_lock_irq(&chip->reg_lock); | ||
2395 | + for (i = 0; i < chip->num_streams; i++) { | ||
2396 | + struct azx_dev *azx_dev = &chip->azx_dev[i]; | ||
2397 | + if (!azx_dev->irq_pending || | ||
2398 | + !azx_dev->substream || | ||
2399 | + !azx_dev->running) | ||
2400 | + continue; | ||
2401 | + if (azx_position_ok(chip, azx_dev)) { | ||
2402 | + azx_dev->irq_pending = 0; | ||
2403 | + spin_unlock(&chip->reg_lock); | ||
2404 | + snd_pcm_period_elapsed(azx_dev->substream); | ||
2405 | + spin_lock(&chip->reg_lock); | ||
2406 | + } else | ||
2407 | + pending++; | ||
2408 | + } | ||
2409 | + spin_unlock_irq(&chip->reg_lock); | ||
2410 | + if (!pending) | ||
2411 | + return; | ||
2412 | + cond_resched(); | ||
2413 | + } | ||
2414 | +} | ||
2415 | + | ||
2416 | +/* clear irq_pending flags and assure no on-going workq */ | ||
2417 | +static void azx_clear_irq_pending(struct azx *chip) | ||
2418 | +{ | ||
2419 | + int i; | ||
2420 | + | ||
2421 | + spin_lock_irq(&chip->reg_lock); | ||
2422 | + for (i = 0; i < chip->num_streams; i++) | ||
2423 | + chip->azx_dev[i].irq_pending = 0; | ||
2424 | + spin_unlock_irq(&chip->reg_lock); | ||
2425 | + flush_scheduled_work(); | ||
2426 | } | ||
2427 | |||
2428 | static struct snd_pcm_ops azx_pcm_ops = { | ||
2429 | @@ -1676,6 +1761,7 @@ static int azx_suspend(struct pci_dev *pci, pm_message_t state) | ||
2430 | int i; | ||
2431 | |||
2432 | snd_power_change_state(card, SNDRV_CTL_POWER_D3hot); | ||
2433 | + azx_clear_irq_pending(chip); | ||
2434 | for (i = 0; i < AZX_MAX_PCMS; i++) | ||
2435 | snd_pcm_suspend_all(chip->pcm[i]); | ||
2436 | if (chip->initialized) | ||
2437 | @@ -1732,6 +1818,7 @@ static int azx_free(struct azx *chip) | ||
2438 | int i; | ||
2439 | |||
2440 | if (chip->initialized) { | ||
2441 | + azx_clear_irq_pending(chip); | ||
2442 | for (i = 0; i < chip->num_streams; i++) | ||
2443 | azx_stream_stop(chip, &chip->azx_dev[i]); | ||
2444 | azx_stop_chip(chip); | ||
2445 | @@ -1857,6 +1944,7 @@ static int __devinit azx_create(struct snd_card *card, struct pci_dev *pci, | ||
2446 | chip->irq = -1; | ||
2447 | chip->driver_type = driver_type; | ||
2448 | chip->msi = enable_msi; | ||
2449 | + INIT_WORK(&chip->irq_pending_work, azx_irq_pending_work); | ||
2450 | |||
2451 | chip->position_fix = check_position_fix(chip, position_fix[dev]); | ||
2452 | check_probe_mask(chip, dev); | ||
2453 | diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c | ||
2454 | index a99e86d..b5f655d 100644 | ||
2455 | --- a/sound/pci/hda/patch_analog.c | ||
2456 | +++ b/sound/pci/hda/patch_analog.c | ||
2457 | @@ -1618,6 +1618,7 @@ static const char *ad1981_models[AD1981_MODELS] = { | ||
2458 | |||
2459 | static struct snd_pci_quirk ad1981_cfg_tbl[] = { | ||
2460 | SND_PCI_QUIRK(0x1014, 0x0597, "Lenovo Z60", AD1981_THINKPAD), | ||
2461 | + SND_PCI_QUIRK(0x1014, 0x05b7, "Lenovo Z60m", AD1981_THINKPAD), | ||
2462 | /* All HP models */ | ||
2463 | SND_PCI_QUIRK(0x103c, 0, "HP nx", AD1981_HP), | ||
2464 | SND_PCI_QUIRK(0x1179, 0x0001, "Toshiba U205", AD1981_TOSHIBA), | ||
2465 | @@ -2623,7 +2624,7 @@ static int ad1988_auto_create_extra_out(struct hda_codec *codec, hda_nid_t pin, | ||
2466 | { | ||
2467 | struct ad198x_spec *spec = codec->spec; | ||
2468 | hda_nid_t nid; | ||
2469 | - int idx, err; | ||
2470 | + int i, idx, err; | ||
2471 | char name[32]; | ||
2472 | |||
2473 | if (! pin) | ||
2474 | @@ -2631,16 +2632,26 @@ static int ad1988_auto_create_extra_out(struct hda_codec *codec, hda_nid_t pin, | ||
2475 | |||
2476 | idx = ad1988_pin_idx(pin); | ||
2477 | nid = ad1988_idx_to_dac(codec, idx); | ||
2478 | - /* specify the DAC as the extra output */ | ||
2479 | - if (! spec->multiout.hp_nid) | ||
2480 | - spec->multiout.hp_nid = nid; | ||
2481 | - else | ||
2482 | - spec->multiout.extra_out_nid[0] = nid; | ||
2483 | - /* control HP volume/switch on the output mixer amp */ | ||
2484 | - sprintf(name, "%s Playback Volume", pfx); | ||
2485 | - if ((err = add_control(spec, AD_CTL_WIDGET_VOL, name, | ||
2486 | - HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT))) < 0) | ||
2487 | - return err; | ||
2488 | + /* check whether the corresponding DAC was already taken */ | ||
2489 | + for (i = 0; i < spec->autocfg.line_outs; i++) { | ||
2490 | + hda_nid_t pin = spec->autocfg.line_out_pins[i]; | ||
2491 | + hda_nid_t dac = ad1988_idx_to_dac(codec, ad1988_pin_idx(pin)); | ||
2492 | + if (dac == nid) | ||
2493 | + break; | ||
2494 | + } | ||
2495 | + if (i >= spec->autocfg.line_outs) { | ||
2496 | + /* specify the DAC as the extra output */ | ||
2497 | + if (!spec->multiout.hp_nid) | ||
2498 | + spec->multiout.hp_nid = nid; | ||
2499 | + else | ||
2500 | + spec->multiout.extra_out_nid[0] = nid; | ||
2501 | + /* control HP volume/switch on the output mixer amp */ | ||
2502 | + sprintf(name, "%s Playback Volume", pfx); | ||
2503 | + err = add_control(spec, AD_CTL_WIDGET_VOL, name, | ||
2504 | + HDA_COMPOSE_AMP_VAL(nid, 3, 0, HDA_OUTPUT)); | ||
2505 | + if (err < 0) | ||
2506 | + return err; | ||
2507 | + } | ||
2508 | nid = ad1988_mixer_nids[idx]; | ||
2509 | sprintf(name, "%s Playback Switch", pfx); | ||
2510 | if ((err = add_control(spec, AD_CTL_BIND_MUTE, name, |