Magellan Linux

Annotation of /alx-src/tags/kernel26-2.6.12-alx-r9/mm/msync.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 630 - (hide annotations) (download)
Wed Mar 4 11:03:09 2009 UTC (15 years, 6 months ago) by niro
File MIME type: text/plain
File size: 5671 byte(s)
Tag kernel26-2.6.12-alx-r9
1 niro 628 /*
2     * linux/mm/msync.c
3     *
4     * Copyright (C) 1994-1999 Linus Torvalds
5     */
6    
7     /*
8     * The msync() system call.
9     */
10     #include <linux/slab.h>
11     #include <linux/pagemap.h>
12     #include <linux/mm.h>
13     #include <linux/mman.h>
14     #include <linux/hugetlb.h>
15     #include <linux/syscalls.h>
16    
17     #include <asm/pgtable.h>
18     #include <asm/tlbflush.h>
19    
20     /*
21     * Called with mm->page_table_lock held to protect against other
22     * threads/the swapper from ripping pte's out from under us.
23     */
24    
25     static void sync_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
26     unsigned long addr, unsigned long end)
27     {
28     pte_t *pte;
29    
30     pte = pte_offset_map(pmd, addr);
31     do {
32     unsigned long pfn;
33     struct page *page;
34    
35     if (!pte_present(*pte))
36     continue;
37     pfn = pte_pfn(*pte);
38     if (!pfn_valid(pfn))
39     continue;
40     page = pfn_to_page(pfn);
41     if (PageReserved(page))
42     continue;
43    
44     if (ptep_clear_flush_dirty(vma, addr, pte) ||
45     page_test_and_clear_dirty(page))
46     set_page_dirty(page);
47     } while (pte++, addr += PAGE_SIZE, addr != end);
48     pte_unmap(pte - 1);
49     }
50    
51     static inline void sync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
52     unsigned long addr, unsigned long end)
53     {
54     pmd_t *pmd;
55     unsigned long next;
56    
57     pmd = pmd_offset(pud, addr);
58     do {
59     next = pmd_addr_end(addr, end);
60     if (pmd_none_or_clear_bad(pmd))
61     continue;
62     sync_pte_range(vma, pmd, addr, next);
63     } while (pmd++, addr = next, addr != end);
64     }
65    
66     static inline void sync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
67     unsigned long addr, unsigned long end)
68     {
69     pud_t *pud;
70     unsigned long next;
71    
72     pud = pud_offset(pgd, addr);
73     do {
74     next = pud_addr_end(addr, end);
75     if (pud_none_or_clear_bad(pud))
76     continue;
77     sync_pmd_range(vma, pud, addr, next);
78     } while (pud++, addr = next, addr != end);
79     }
80    
81     static void sync_page_range(struct vm_area_struct *vma,
82     unsigned long addr, unsigned long end)
83     {
84     struct mm_struct *mm = vma->vm_mm;
85     pgd_t *pgd;
86     unsigned long next;
87    
88     /* For hugepages we can't go walking the page table normally,
89     * but that's ok, hugetlbfs is memory based, so we don't need
90     * to do anything more on an msync() */
91     if (is_vm_hugetlb_page(vma))
92     return;
93    
94     BUG_ON(addr >= end);
95     pgd = pgd_offset(mm, addr);
96     flush_cache_range(vma, addr, end);
97     spin_lock(&mm->page_table_lock);
98     do {
99     next = pgd_addr_end(addr, end);
100     if (pgd_none_or_clear_bad(pgd))
101     continue;
102     sync_pud_range(vma, pgd, addr, next);
103     } while (pgd++, addr = next, addr != end);
104     spin_unlock(&mm->page_table_lock);
105     }
106    
107     #ifdef CONFIG_PREEMPT
108     static inline void filemap_sync(struct vm_area_struct *vma,
109     unsigned long addr, unsigned long end)
110     {
111     const size_t chunk = 64 * 1024; /* bytes */
112     unsigned long next;
113    
114     do {
115     next = addr + chunk;
116     if (next > end || next < addr)
117     next = end;
118     sync_page_range(vma, addr, next);
119     cond_resched();
120     } while (addr = next, addr != end);
121     }
122     #else
123     static inline void filemap_sync(struct vm_area_struct *vma,
124     unsigned long addr, unsigned long end)
125     {
126     sync_page_range(vma, addr, end);
127     }
128     #endif
129    
130     /*
131     * MS_SYNC syncs the entire file - including mappings.
132     *
133     * MS_ASYNC does not start I/O (it used to, up to 2.5.67). Instead, it just
134     * marks the relevant pages dirty. The application may now run fsync() to
135     * write out the dirty pages and wait on the writeout and check the result.
136     * Or the application may run fadvise(FADV_DONTNEED) against the fd to start
137     * async writeout immediately.
138     * So my _not_ starting I/O in MS_ASYNC we provide complete flexibility to
139     * applications.
140     */
141     static int msync_interval(struct vm_area_struct *vma,
142     unsigned long addr, unsigned long end, int flags)
143     {
144     int ret = 0;
145     struct file *file = vma->vm_file;
146    
147     if ((flags & MS_INVALIDATE) && (vma->vm_flags & VM_LOCKED))
148     return -EBUSY;
149    
150     if (file && (vma->vm_flags & VM_SHARED)) {
151     filemap_sync(vma, addr, end);
152    
153     if (flags & MS_SYNC) {
154     struct address_space *mapping = file->f_mapping;
155     int err;
156    
157     ret = filemap_fdatawrite(mapping);
158     if (file->f_op && file->f_op->fsync) {
159     /*
160     * We don't take i_sem here because mmap_sem
161     * is already held.
162     */
163     err = file->f_op->fsync(file,file->f_dentry,1);
164     if (err && !ret)
165     ret = err;
166     }
167     err = filemap_fdatawait(mapping);
168     if (!ret)
169     ret = err;
170     }
171     }
172     return ret;
173     }
174    
175     asmlinkage long sys_msync(unsigned long start, size_t len, int flags)
176     {
177     unsigned long end;
178     struct vm_area_struct *vma;
179     int unmapped_error, error = -EINVAL;
180    
181     if (flags & MS_SYNC)
182     current->flags |= PF_SYNCWRITE;
183    
184     down_read(&current->mm->mmap_sem);
185     if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC))
186     goto out;
187     if (start & ~PAGE_MASK)
188     goto out;
189     if ((flags & MS_ASYNC) && (flags & MS_SYNC))
190     goto out;
191     error = -ENOMEM;
192     len = (len + ~PAGE_MASK) & PAGE_MASK;
193     end = start + len;
194     if (end < start)
195     goto out;
196     error = 0;
197     if (end == start)
198     goto out;
199     /*
200     * If the interval [start,end) covers some unmapped address ranges,
201     * just ignore them, but return -ENOMEM at the end.
202     */
203     vma = find_vma(current->mm, start);
204     unmapped_error = 0;
205     for (;;) {
206     /* Still start < end. */
207     error = -ENOMEM;
208     if (!vma)
209     goto out;
210     /* Here start < vma->vm_end. */
211     if (start < vma->vm_start) {
212     unmapped_error = -ENOMEM;
213     start = vma->vm_start;
214     }
215     /* Here vma->vm_start <= start < vma->vm_end. */
216     if (end <= vma->vm_end) {
217     if (start < end) {
218     error = msync_interval(vma, start, end, flags);
219     if (error)
220     goto out;
221     }
222     error = unmapped_error;
223     goto out;
224     }
225     /* Here vma->vm_start <= start < vma->vm_end < end. */
226     error = msync_interval(vma, start, vma->vm_end, flags);
227     if (error)
228     goto out;
229     start = vma->vm_end;
230     vma = vma->vm_next;
231     }
232     out:
233     up_read(&current->mm->mmap_sem);
234     current->flags &= ~PF_SYNCWRITE;
235     return error;
236     }