]> sourceware.org Git - newlib-cygwin.git/blob - winsup/cygwin/mmap.cc
* Merge in cygwin-64bit-branch.
[newlib-cygwin.git] / winsup / cygwin / mmap.cc
1 /* mmap.cc
2
3 Copyright 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 2008, 2009, 2010, 2011, 2012, 2013 Red Hat, Inc.
5
6 This file is part of Cygwin.
7
8 This software is a copyrighted work licensed under the terms of the
9 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
10 details. */
11
12 #include "winsup.h"
13 #include "miscfuncs.h"
14 #include <unistd.h>
15 #include <stdlib.h>
16 #include <sys/mman.h>
17 #include <sys/param.h>
18 #include "cygerrno.h"
19 #include "security.h"
20 #include "path.h"
21 #include "fhandler.h"
22 #include "dtable.h"
23 #include "cygheap.h"
24 #include "ntdll.h"
25 #include <sys/queue.h>
26
27 /* __PROT_ATTACH indicates an anonymous mapping which is supposed to be
28 attached to a file mapping for pages beyond the file's EOF. The idea
29 is to support mappings longer than the file, without the file growing
30 to mapping length (POSIX semantics). */
31 #define __PROT_ATTACH 0x8000000
32 /* Filler pages are the pages from the last file backed page to the next
33 64K boundary. These pages are created as anonymous pages, but with
34 the same page protection as the file's pages, since POSIX applications
35 expect to be able to access this part the same way as the file pages. */
36 #define __PROT_FILLER 0x4000000
37
38 /* Stick with 4K pages for bookkeeping, otherwise we just get confused
39 when trying to do file mappings with trailing filler pages correctly. */
40 #define PAGE_CNT(bytes) howmany((bytes), wincap.page_size())
41
42 #define PGBITS (sizeof (DWORD)*8)
43 #define MAPSIZE(pages) howmany ((pages), PGBITS)
44
45 #define MAP_SET(n) (page_map[(n)/PGBITS] |= (1L << ((n) % PGBITS)))
46 #define MAP_CLR(n) (page_map[(n)/PGBITS] &= ~(1L << ((n) % PGBITS)))
47 #define MAP_ISSET(n) (page_map[(n)/PGBITS] & (1L << ((n) % PGBITS)))
48
49 /* Used for anonymous mappings. */
50 static fhandler_dev_zero fh_anonymous;
51
52 /* Used for thread synchronization while accessing mmap bookkeeping lists. */
53 static NO_COPY muto mmap_guard;
54 #define LIST_LOCK() (mmap_guard.init ("mmap_guard")->acquire ())
55 #define LIST_UNLOCK() (mmap_guard.release ())
56
57 /* Small helpers to avoid having lots of flag bit tests in the code. */
58 static inline bool
59 priv (int flags)
60 {
61 return (flags & MAP_PRIVATE) == MAP_PRIVATE;
62 }
63
64 static inline bool
65 fixed (int flags)
66 {
67 return (flags & MAP_FIXED) == MAP_FIXED;
68 }
69
70 static inline bool
71 anonymous (int flags)
72 {
73 return (flags & MAP_ANONYMOUS) == MAP_ANONYMOUS;
74 }
75
76 static inline bool
77 noreserve (int flags)
78 {
79 return (flags & MAP_NORESERVE) == MAP_NORESERVE;
80 }
81
82 static inline bool
83 autogrow (int flags)
84 {
85 return (flags & MAP_AUTOGROW) == MAP_AUTOGROW;
86 }
87
88 static inline bool
89 attached (int prot)
90 {
91 return (prot & __PROT_ATTACH) == __PROT_ATTACH;
92 }
93
94 static inline bool
95 filler (int prot)
96 {
97 return (prot & __PROT_FILLER) == __PROT_FILLER;
98 }
99
100 static inline DWORD
101 gen_create_protect (DWORD openflags, int flags)
102 {
103 DWORD ret = PAGE_READONLY;
104
105 if (priv (flags))
106 ret = PAGE_WRITECOPY;
107 else if (openflags & GENERIC_WRITE)
108 ret = PAGE_READWRITE;
109
110 if (openflags & GENERIC_EXECUTE)
111 ret <<= 4;
112
113 return ret;
114 }
115
116 /* Generate Windows protection flags from mmap prot and flag values. */
117 static inline DWORD
118 gen_protect (int prot, int flags)
119 {
120 DWORD ret = PAGE_NOACCESS;
121
122 /* Attached pages are only reserved, but the protection must be a
123 valid value, so we just return PAGE_READWRITE. */
124 if (attached (prot))
125 return PAGE_EXECUTE_READWRITE;
126
127 if (prot & PROT_WRITE)
128 ret = (priv (flags) && (!anonymous (flags) || filler (prot)))
129 ? PAGE_WRITECOPY : PAGE_READWRITE;
130 else if (prot & PROT_READ)
131 ret = PAGE_READONLY;
132
133 if (prot & PROT_EXEC)
134 ret <<= 4;
135
136 return ret;
137 }
138
139 static HANDLE
140 CreateMapping (HANDLE fhdl, size_t len, off_t off, DWORD openflags,
141 int prot, int flags)
142 {
143 HANDLE h;
144 NTSTATUS status;
145
146 LARGE_INTEGER sectionsize = { QuadPart: (LONGLONG) len };
147 ULONG protect = gen_create_protect (openflags, flags);
148 ULONG attributes = attached (prot) ? SEC_RESERVE : SEC_COMMIT;
149
150 OBJECT_ATTRIBUTES oa;
151 InitializeObjectAttributes (&oa, NULL, OBJ_INHERIT, NULL,
152 sec_none.lpSecurityDescriptor);
153
154 if (fhdl == INVALID_HANDLE_VALUE)
155 {
156 /* Standard anonymous mapping needs non-zero len. */
157 status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, &sectionsize,
158 protect, attributes, NULL);
159 }
160 else if (autogrow (flags))
161 {
162 /* Auto-grow only works if the protection is PAGE_READWRITE. So,
163 first we call NtCreateSection with PAGE_READWRITE, then, if the
164 requested protection is different, we close the mapping and
165 reopen it again with the correct protection, if auto-grow worked. */
166 sectionsize.QuadPart += off;
167 status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, &sectionsize,
168 PAGE_READWRITE, attributes, fhdl);
169 if (NT_SUCCESS (status) && protect != PAGE_READWRITE)
170 {
171 NtClose (h);
172 status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, &sectionsize,
173 protect, attributes, fhdl);
174 }
175 }
176 else
177 {
178 /* Zero len creates mapping for whole file and allows
179 AT_EXTENDABLE_FILE mapping, if we ever use it... */
180 sectionsize.QuadPart = 0;
181 status = NtCreateSection (&h, SECTION_ALL_ACCESS, &oa, &sectionsize,
182 protect, attributes, fhdl);
183 }
184 if (!NT_SUCCESS (status))
185 {
186 h = NULL;
187 SetLastError (RtlNtStatusToDosError (status));
188 }
189 return h;
190 }
191
192 static void *
193 MapView (HANDLE h, void *addr, size_t len, DWORD openflags,
194 int prot, int flags, off_t off)
195 {
196 NTSTATUS status;
197 LARGE_INTEGER offset = { QuadPart:off };
198 DWORD protect = gen_create_protect (openflags, flags);
199 void *base = addr;
200 SIZE_T commitsize = attached (prot) ? 0 : len;
201 SIZE_T viewsize = len;
202 #ifdef __x86_64__ /* AT_ROUND_TO_PAGE isn't supported on 64 bit systems. */
203 ULONG alloc_type = MEM_TOP_DOWN;
204 #else
205 ULONG alloc_type = (base && !wincap.is_wow64 () ? AT_ROUND_TO_PAGE : 0)
206 | MEM_TOP_DOWN;
207 #endif
208
209 /* Try mapping using the given address first, even if it's NULL.
210 If it failed, and addr was not NULL and flags is not MAP_FIXED,
211 try again with NULL address.
212
213 Note: Retrying the mapping might be unnecessary, now that mmap64 checks
214 for a valid memory area first. */
215 status = NtMapViewOfSection (h, NtCurrentProcess (), &base, 0, commitsize,
216 &offset, &viewsize, ViewShare, alloc_type,
217 protect);
218 if (!NT_SUCCESS (status) && addr && !fixed (flags))
219 {
220 base = NULL;
221 status = NtMapViewOfSection (h, NtCurrentProcess (), &base, 0, commitsize,
222 &offset, &viewsize, ViewShare, 0, protect);
223 }
224 if (!NT_SUCCESS (status))
225 {
226 base = NULL;
227 SetLastError (RtlNtStatusToDosError (status));
228 }
229 debug_printf ("%p (status %p) = NtMapViewOfSection (h:%p, addr:%p, len:%lu,"
230 " off:%Y, protect:%y, type:%y)",
231 base, status, h, addr, len, off, protect, 0);
232 return base;
233 }
234
235 /* Class structure used to keep a record of all current mmap areas
236 in a process. Needed for bookkeeping all mmaps in a process and
237 for duplicating all mmaps after fork() since mmaps are not propagated
238 to child processes by Windows. All information must be duplicated
239 by hand, see fixup_mmaps_after_fork().
240
241 The class structure:
242
243 One member of class map per process, global variable mmapped_areas.
244 Contains a singly-linked list of type class mmap_list. Each mmap_list
245 entry represents all mapping to a file, keyed by file descriptor and
246 file name hash.
247 Each list entry contains a singly-linked list of type class mmap_record.
248 Each mmap_record represents exactly one mapping. For each mapping, there's
249 an additional so called `page_map'. It's an array of bits, one bit
250 per mapped memory page. The bit is set if the page is accessible,
251 unset otherwise. */
252
253 #pragma pack(push, 4)
254 class mmap_record
255 {
256 public:
257 LIST_ENTRY (mmap_record) mr_next;
258
259 private:
260 /* 4 byte on 32 bit, 8 byte on 64 bit */
261 HANDLE mapping_hdl;
262 SIZE_T len;
263 caddr_t base_address;
264 /* Always 8 bytes */
265 off_t offset;
266 /* Always 4 bytes */
267 int fd;
268 DWORD openflags;
269 int prot;
270 int flags;
271 dev_t dev;
272 DWORD page_map[0];
273
274 public:
275 mmap_record (int nfd, HANDLE h, DWORD of, int p, int f, off_t o, DWORD l,
276 caddr_t b) :
277 mapping_hdl (h),
278 len (l),
279 base_address (b),
280 offset (o),
281 fd (nfd),
282 openflags (of),
283 prot (p),
284 flags (f)
285 {
286 dev = 0;
287 if (fd >= 0 && !cygheap->fdtab.not_open (fd))
288 dev = cygheap->fdtab[fd]->dev ();
289 else if (fd == -1)
290 dev = FH_ZERO;
291 }
292
293 int get_fd () const { return fd; }
294 HANDLE get_handle () const { return mapping_hdl; }
295 int get_device () { return dev; }
296 int get_prot () const { return prot; }
297 int get_openflags () const { return openflags; }
298 int get_flags () const { return flags; }
299 bool priv () const { return ::priv (flags); }
300 bool fixed () const { return ::fixed (flags); }
301 bool anonymous () const { return ::anonymous (flags); }
302 bool noreserve () const { return ::noreserve (flags); }
303 bool autogrow () const { return ::autogrow (flags); }
304 bool attached () const { return ::attached (prot); }
305 bool filler () const { return ::filler (prot); }
306 off_t get_offset () const { return offset; }
307 SIZE_T get_len () const { return len; }
308 caddr_t get_address () const { return base_address; }
309
310 void init_page_map (mmap_record &r);
311
312 DWORD find_unused_pages (DWORD pages) const;
313 bool match (caddr_t addr, SIZE_T len, caddr_t &m_addr, DWORD &m_len);
314 off_t map_pages (off_t off, SIZE_T len);
315 bool map_pages (caddr_t addr, SIZE_T len);
316 bool unmap_pages (caddr_t addr, SIZE_T len);
317 int access (caddr_t address);
318
319 fhandler_base *alloc_fh ();
320 void free_fh (fhandler_base *fh);
321
322 DWORD gen_create_protect () const
323 { return ::gen_create_protect (get_openflags (), get_flags ()); }
324 DWORD gen_protect () const
325 { return ::gen_protect (get_prot (), get_flags ()); }
326 bool compatible_flags (int fl) const;
327 };
328 #pragma pack(pop)
329
330 class mmap_list
331 {
332 public:
333 LIST_ENTRY (mmap_list) ml_next;
334 LIST_HEAD (, mmap_record) recs;
335
336 private:
337 int fd;
338 ino_t hash;
339
340 public:
341 int get_fd () const { return fd; }
342 ino_t get_hash () const { return hash; }
343
344 bool anonymous () const { return fd == -1; }
345 void set (int nfd, struct stat *st);
346 mmap_record *add_record (mmap_record &r);
347 bool del_record (mmap_record *rec);
348 caddr_t try_map (void *addr, size_t len, int flags, off_t off);
349 };
350
351 class mmap_areas
352 {
353 public:
354 LIST_HEAD (, mmap_list) lists;
355
356 mmap_list *get_list_by_fd (int fd, struct stat *st);
357 mmap_list *add_list (int fd, struct stat *st);
358 void del_list (mmap_list *ml);
359 };
360
361 /* This is the global map structure pointer. */
362 static mmap_areas mmapped_areas;
363
364 bool
365 mmap_record::compatible_flags (int fl) const
366 {
367 #define MAP_COMPATMASK (MAP_TYPE | MAP_NORESERVE)
368 return (get_flags () & MAP_COMPATMASK) == (fl & MAP_COMPATMASK);
369 }
370
371 DWORD
372 mmap_record::find_unused_pages (DWORD pages) const
373 {
374 DWORD mapped_pages = PAGE_CNT (get_len ());
375 DWORD start;
376
377 if (pages > mapped_pages)
378 return (DWORD)-1;
379 for (start = 0; start <= mapped_pages - pages; ++start)
380 if (!MAP_ISSET (start))
381 {
382 DWORD cnt;
383 for (cnt = 0; cnt < pages; ++cnt)
384 if (MAP_ISSET (start + cnt))
385 break;
386 if (cnt >= pages)
387 return start;
388 }
389 return (DWORD)-1;
390 }
391
392 bool
393 mmap_record::match (caddr_t addr, SIZE_T len, caddr_t &m_addr, DWORD &m_len)
394 {
395 caddr_t low = (addr >= get_address ()) ? addr : get_address ();
396 caddr_t high = get_address ();
397 if (filler ())
398 high += get_len ();
399 else
400 high += (PAGE_CNT (get_len ()) * wincap.page_size ());
401 high = (addr + len < high) ? addr + len : high;
402 if (low < high)
403 {
404 m_addr = low;
405 m_len = high - low;
406 return true;
407 }
408 return false;
409 }
410
411 void
412 mmap_record::init_page_map (mmap_record &r)
413 {
414 *this = r;
415 DWORD start_protect = gen_create_protect ();
416 DWORD real_protect = gen_protect ();
417 if (real_protect != start_protect && !noreserve ()
418 && !VirtualProtect (get_address (), get_len (),
419 real_protect, &start_protect))
420 system_printf ("Warning: VirtualProtect (addr: %p, len: %ly, "
421 "new_prot: %y, old_prot: %y), %E",
422 get_address (), get_len (),
423 real_protect, start_protect);
424 SIZE_T len = PAGE_CNT (get_len ());
425 while (len-- > 0)
426 MAP_SET (len);
427 }
428
429 off_t
430 mmap_record::map_pages (off_t off, SIZE_T len)
431 {
432 /* Used ONLY if this mapping matches into the chunk of another already
433 performed mapping in a special case of MAP_ANON|MAP_PRIVATE.
434
435 Otherwise it's job is now done by init_page_map(). */
436 DWORD old_prot;
437 debug_printf ("map_pages (fd=%d, off=%Y, len=%lu)", get_fd (), off, len);
438 len = PAGE_CNT (len);
439
440 if ((off = find_unused_pages (len)) == (DWORD)-1)
441 return 0L;
442 if (!noreserve ()
443 && !VirtualProtect (get_address () + off * wincap.page_size (),
444 len * wincap.page_size (), gen_protect (),
445 &old_prot))
446 {
447 __seterrno ();
448 return (off_t)-1;
449 }
450
451 while (len-- > 0)
452 MAP_SET (off + len);
453 return off * wincap.page_size ();
454 }
455
456 bool
457 mmap_record::map_pages (caddr_t addr, SIZE_T len)
458 {
459 debug_printf ("map_pages (addr=%p, len=%lu)", addr, len);
460 DWORD old_prot;
461 DWORD off = addr - get_address ();
462 off /= wincap.page_size ();
463 len = PAGE_CNT (len);
464 /* First check if the area is unused right now. */
465 for (DWORD l = 0; l < len; ++l)
466 if (MAP_ISSET (off + l))
467 {
468 set_errno (EINVAL);
469 return false;
470 }
471 if (!noreserve ()
472 && !VirtualProtect (get_address () + off * wincap.page_size (),
473 len * wincap.page_size (), gen_protect (),
474 &old_prot))
475 {
476 __seterrno ();
477 return false;
478 }
479 for (; len-- > 0; ++off)
480 MAP_SET (off);
481 return true;
482 }
483
484 bool
485 mmap_record::unmap_pages (caddr_t addr, SIZE_T len)
486 {
487 DWORD old_prot;
488 DWORD off = addr - get_address ();
489 if (noreserve ()
490 && !VirtualFree (get_address () + off, len, MEM_DECOMMIT))
491 debug_printf ("VirtualFree in unmap_pages () failed, %E");
492 else if (!VirtualProtect (get_address () + off, len, PAGE_NOACCESS,
493 &old_prot))
494 debug_printf ("VirtualProtect in unmap_pages () failed, %E");
495
496 off /= wincap.page_size ();
497 len = PAGE_CNT (len);
498 for (; len-- > 0; ++off)
499 MAP_CLR (off);
500 /* Return TRUE if all pages are free'd which may result in unmapping
501 the whole chunk. */
502 for (len = MAPSIZE (PAGE_CNT (get_len ())); len > 0; )
503 if (page_map[--len])
504 return false;
505 return true;
506 }
507
508 int
509 mmap_record::access (caddr_t address)
510 {
511 if (address < get_address () || address >= get_address () + get_len ())
512 return 0;
513 DWORD off = (address - get_address ()) / wincap.page_size ();
514 return MAP_ISSET (off);
515 }
516
517 fhandler_base *
518 mmap_record::alloc_fh ()
519 {
520 if (anonymous ())
521 {
522 fh_anonymous.set_io_handle (INVALID_HANDLE_VALUE);
523 fh_anonymous.set_access (GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE);
524 return &fh_anonymous;
525 }
526
527 /* The file descriptor could have been closed or, even
528 worse, could have been reused for another file before
529 the call to fork(). This requires creating a fhandler
530 of the correct type to be sure to call the method of the
531 correct class. */
532 device fdev;
533 fdev.name = fdev.native = "";
534 fdev.parse (get_device ());
535 fhandler_base *fh = build_fh_dev (fdev);
536 if (fh)
537 fh->set_access (get_openflags ());
538 return fh;
539 }
540
541 void
542 mmap_record::free_fh (fhandler_base *fh)
543 {
544 if (!anonymous ())
545 delete fh;
546 }
547
548 mmap_record *
549 mmap_list::add_record (mmap_record &r)
550 {
551 mmap_record *rec = (mmap_record *) ccalloc (HEAP_MMAP,
552 sizeof (mmap_record)
553 + MAPSIZE (PAGE_CNT (r.get_len ())) * sizeof (DWORD), 1);
554 if (!rec)
555 return NULL;
556 rec->init_page_map (r);
557
558 LIST_INSERT_HEAD (&recs, rec, mr_next);
559 return rec;
560 }
561
562 void
563 mmap_list::set (int nfd, struct stat *st)
564 {
565 fd = nfd;
566 if (!anonymous ())
567 {
568 /* The fd isn't sufficient since it could already be the fd of another
569 file. So we use the inode number as evaluated by fstat to identify
570 the file. */
571 hash = st ? st->st_ino : (ino_t) 0;
572 }
573 LIST_INIT (&recs);
574 }
575
576 bool
577 mmap_list::del_record (mmap_record *rec)
578 {
579 LIST_REMOVE (rec, mr_next);
580 cfree (rec);
581 /* Return true if the list is empty which allows the caller to remove
582 this list from the list of lists. */
583 return !LIST_FIRST(&recs);
584 }
585
586 caddr_t
587 mmap_list::try_map (void *addr, size_t len, int flags, off_t off)
588 {
589 mmap_record *rec;
590
591 if (off == 0 && !fixed (flags))
592 {
593 /* If MAP_FIXED isn't given, check if this mapping matches into the
594 chunk of another already performed mapping. */
595 SIZE_T plen = PAGE_CNT (len);
596 LIST_FOREACH (rec, &recs, mr_next)
597 if (rec->find_unused_pages (plen) != (DWORD) -1)
598 break;
599 if (rec && rec->compatible_flags (flags))
600 {
601 if ((off = rec->map_pages (off, len)) == (off_t) -1)
602 return (caddr_t) MAP_FAILED;
603 return (caddr_t) rec->get_address () + off;
604 }
605 }
606 else if (fixed (flags))
607 {
608 /* If MAP_FIXED is given, test if the requested area is in an
609 unmapped part of an still active mapping. This can happen
610 if a memory region is unmapped and remapped with MAP_FIXED. */
611 caddr_t u_addr;
612 DWORD u_len;
613
614 LIST_FOREACH (rec, &recs, mr_next)
615 if (rec->match ((caddr_t) addr, len, u_addr, u_len))
616 break;
617 if (rec)
618 {
619 if (u_addr > (caddr_t) addr || u_addr + len < (caddr_t) addr + len
620 || !rec->compatible_flags (flags))
621 {
622 /* Partial match only, or access mode doesn't match. */
623 /* FIXME: Handle partial mappings gracefully if adjacent
624 memory is available. */
625 set_errno (EINVAL);
626 return (caddr_t) MAP_FAILED;
627 }
628 if (!rec->map_pages ((caddr_t) addr, len))
629 return (caddr_t) MAP_FAILED;
630 return (caddr_t) addr;
631 }
632 }
633 return NULL;
634 }
635
636 mmap_list *
637 mmap_areas::get_list_by_fd (int fd, struct stat *st)
638 {
639 mmap_list *ml;
640 LIST_FOREACH (ml, &lists, ml_next)
641 {
642 if (fd == -1 && ml->anonymous ())
643 return ml;
644 /* The fd isn't sufficient since it could already be the fd of another
645 file. So we use the inode number as evaluated by fstat to identify
646 the file. */
647 if (fd != -1 && st && ml->get_hash () == st->st_ino)
648 return ml;
649 }
650 return 0;
651 }
652
653 mmap_list *
654 mmap_areas::add_list (int fd, struct stat *st)
655 {
656 mmap_list *ml = (mmap_list *) cmalloc (HEAP_MMAP, sizeof (mmap_list));
657 if (!ml)
658 return NULL;
659 ml->set (fd, st);
660 LIST_INSERT_HEAD (&lists, ml, ml_next);
661 return ml;
662 }
663
664 void
665 mmap_areas::del_list (mmap_list *ml)
666 {
667 LIST_REMOVE (ml, ml_next);
668 cfree (ml);
669 }
670
671 /* This function allows an external function to test if a given memory
672 region is part of an mmapped memory region. */
673 bool
674 is_mmapped_region (caddr_t start_addr, caddr_t end_address)
675 {
676 size_t len = end_address - start_addr;
677
678 LIST_LOCK ();
679 mmap_list *map_list = mmapped_areas.get_list_by_fd (-1, NULL);
680
681 if (!map_list)
682 {
683 LIST_UNLOCK ();
684 return false;
685 }
686
687 mmap_record *rec;
688 caddr_t u_addr;
689 DWORD u_len;
690 bool ret = false;
691
692 LIST_FOREACH (rec, &map_list->recs, mr_next)
693 {
694 if (rec->match (start_addr, len, u_addr, u_len))
695 {
696 ret = true;
697 break;
698 }
699 }
700 LIST_UNLOCK ();
701 return ret;
702 }
703
704 /* This function is called from exception_handler when a segmentation
705 violation has occurred. It should also be called from all Cygwin
706 functions that want to support passing noreserve mmap page addresses
707 to Windows system calls. In that case, it should be called only after
708 a system call indicates that the application buffer passed had an
709 invalid virtual address to avoid any performance impact in non-noreserve
710 cases.
711
712 Check if the address range is all within noreserve mmap regions. If so,
713 call VirtualAlloc to commit the pages and return MMAP_NORESERVE_COMMITED
714 on success. If the page has __PROT_ATTACH (SUSv3 memory protection
715 extension), or if VirutalAlloc fails, return MMAP_RAISE_SIGBUS.
716 Otherwise, return MMAP_NONE if the address range is not covered by an
717 attached or noreserve map.
718
719 On MAP_NORESERVE_COMMITED, the exeception handler should return 0 to
720 allow the application to retry the memory access, or the calling Cygwin
721 function should retry the Windows system call. */
722
723 mmap_region_status
724 mmap_is_attached_or_noreserve (void *addr, size_t len)
725 {
726 mmap_region_status ret = MMAP_NONE;
727
728 LIST_LOCK ();
729 mmap_list *map_list = mmapped_areas.get_list_by_fd (-1, NULL);
730
731 size_t pagesize = wincap.allocation_granularity ();
732 caddr_t start_addr = (caddr_t) rounddown ((uintptr_t) addr, pagesize);
733 len += ((caddr_t) addr - start_addr);
734 len = roundup2 (len, pagesize);
735
736 if (map_list == NULL)
737 goto out;
738
739 mmap_record *rec;
740 caddr_t u_addr;
741 DWORD u_len;
742
743 LIST_FOREACH (rec, &map_list->recs, mr_next)
744 {
745 if (!rec->match (start_addr, len, u_addr, u_len))
746 continue;
747 if (rec->attached ())
748 {
749 ret = MMAP_RAISE_SIGBUS;
750 break;
751 }
752 if (!rec->noreserve ())
753 break;
754
755 size_t commit_len = u_len - (start_addr - u_addr);
756 if (commit_len > len)
757 commit_len = len;
758
759 if (!VirtualAlloc (start_addr, commit_len, MEM_COMMIT,
760 rec->gen_protect ()))
761 {
762 ret = MMAP_RAISE_SIGBUS;
763 break;
764 }
765
766 start_addr += commit_len;
767 len -= commit_len;
768 if (!len)
769 {
770 ret = MMAP_NORESERVE_COMMITED;
771 break;
772 }
773 }
774 out:
775 LIST_UNLOCK ();
776 return ret;
777 }
778
779 static caddr_t
780 mmap_worker (mmap_list *map_list, fhandler_base *fh, caddr_t base, size_t len,
781 int prot, int flags, int fd, off_t off, struct stat *st)
782 {
783 HANDLE h = fh->mmap (&base, len, prot, flags, off);
784 if (h == INVALID_HANDLE_VALUE)
785 return NULL;
786 if (!map_list
787 && !(map_list = mmapped_areas.get_list_by_fd (fd, st))
788 && !(map_list = mmapped_areas.add_list (fd, st)))
789 {
790 fh->munmap (h, base, len);
791 return NULL;
792 }
793 mmap_record mmap_rec (fd, h, fh->get_access (), prot, flags, off, len, base);
794 mmap_record *rec = map_list->add_record (mmap_rec);
795 if (!rec)
796 {
797 fh->munmap (h, base, len);
798 return NULL;
799 }
800 return base;
801 }
802
803 #ifdef __x86_64__
804
805 /* The memory region used for memory maps */
806 #define MMAP_STORAGE_LOW 0x00800000000L /* Leave 8 Gigs for heap. */
807 #define MMAP_STORAGE_HIGH 0x70000000000L /* Leave enough room for OS. */
808
809 /* FIXME? Unfortunately the OS doesn't support a top down allocation with
810 a ceiling value. The ZeroBits mechanism only works for
811 NtMapViewOfSection and it only evaluates the high bit of ZeroBits
812 on 64 bit, so it's pretty much useless for our purposes.
813
814 If the below simple mechanism to perform top-down allocations
815 turns out to be too dumb, we need something else. One idea is to
816 dived the space in (3835) 4 Gig chunks and simply store the
817 available free space per slot. Then we can go top down from slot
818 to slot and only try slots which are supposed to have enough space.
819 Bookkeeping would be very simple and fast. */
820 class mmap_allocator
821 {
822 caddr_t mmap_current_low;
823
824 public:
825 mmap_allocator () : mmap_current_low ((caddr_t) MMAP_STORAGE_HIGH) {}
826
827 PVOID alloc (PVOID in_addr, SIZE_T in_size, bool fixed)
828 {
829 MEMORY_BASIC_INFORMATION mbi;
830
831 SIZE_T size = roundup2 (in_size, wincap.allocation_granularity ());
832 /* First check for the given address. */
833 if (in_addr)
834 {
835 /* If it points to a free area, big enough to fulfill the request,
836 return the address. */
837 if (VirtualQuery (in_addr, &mbi, sizeof mbi)
838 && mbi.State == MEM_FREE
839 && mbi.RegionSize >= size)
840 return in_addr;
841 /* Otherwise, if MAP_FIXED was given, give up. */
842 if (fixed)
843 return NULL;
844 /* Otherwise, fall through to the usual free space search mechanism. */
845 }
846 /* Start with the last allocation start address - requested size. */
847 caddr_t addr = mmap_current_low - size;
848 bool merry_go_round = false;
849 do
850 {
851 /* Did we hit the lower ceiling? If so, restart from the upper
852 ceiling, but note that we did it. */
853 if (addr < (caddr_t) MMAP_STORAGE_LOW)
854 {
855 addr = (caddr_t) MMAP_STORAGE_HIGH - size;
856 merry_go_round = true;
857 }
858 /* Shouldn't fail, but better test. */
859 if (!VirtualQuery ((PVOID) addr, &mbi, sizeof mbi))
860 return NULL;
861 /* If the region is free... */
862 if (mbi.State == MEM_FREE)
863 {
864 /* ...and the region is big enough to fulfill the request... */
865 if (mbi.RegionSize >= size)
866 {
867 /* ...note the address as next start address for our simple
868 merry-go-round and return the address. */
869 mmap_current_low = addr;
870 return (PVOID) addr;
871 }
872 /* Otherwise, subtract what's missing in size and try again. */
873 addr -= size - mbi.RegionSize;
874 }
875 /* If the region isn't free, skip to address below AllocationBase
876 and try again. */
877 else
878 addr = (caddr_t) mbi.AllocationBase - size;
879 }
880 /* Repeat until we had a full ride on the merry_go_round. */
881 while (!merry_go_round || addr >= mmap_current_low);
882 return NULL;
883 }
884 };
885
886 static mmap_allocator mmap_alloc; /* Inherited by forked child. */
887 #endif
888
889 extern "C" void *
890 mmap64 (void *addr, size_t len, int prot, int flags, int fd, off_t off)
891 {
892 syscall_printf ("addr %p, len %lu, prot %y, flags %y, fd %d, off %Y",
893 addr, len, prot, flags, fd, off);
894
895 caddr_t ret = (caddr_t) MAP_FAILED;
896 fhandler_base *fh = NULL;
897 fhandler_disk_file *fh_disk_file = NULL; /* Used for reopening a disk file
898 when necessary. */
899 mmap_list *map_list = NULL;
900 size_t orig_len = 0;
901 caddr_t base = NULL;
902 struct stat st;
903
904 DWORD pagesize = wincap.allocation_granularity ();
905
906 fh_anonymous.set_io_handle (INVALID_HANDLE_VALUE);
907 fh_anonymous.set_access (GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE);
908
909 /* EINVAL error conditions. */
910 if (off % pagesize
911 || ((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)))
912 || ((flags & MAP_TYPE) != MAP_SHARED
913 && (flags & MAP_TYPE) != MAP_PRIVATE)
914 || (fixed (flags) && ((uintptr_t) addr % pagesize))
915 || !len)
916 {
917 set_errno (EINVAL);
918 goto out;
919 }
920
921 if (!anonymous (flags) && fd != -1)
922 {
923 /* Ensure that fd is open */
924 cygheap_fdget cfd (fd);
925 if (cfd < 0)
926 goto out;
927
928 fh = cfd;
929
930 /* mmap /dev/zero is like MAP_ANONYMOUS. */
931 if (fh->get_device () == FH_ZERO)
932 flags |= MAP_ANONYMOUS;
933
934 /* The autoconf mmap test maps a file of size 1 byte. It then tests
935 every byte of the entire mapped page of 64K for 0-bytes since that's
936 what POSIX requires. The problem is, we can't create that mapping on
937 64 bit systems. The file mapping will be only a single page, 4K, and
938 since 64 bit systems don't support the AT_ROUND_TO_PAGE flag, the
939 remainder of the 64K slot will result in a SEGV when accessed.
940
941 So, what we do here is cheating for the sake of the autoconf test
942 on 64 bit systems. The justification is that there's very likely
943 no application actually utilizing the map beyond EOF, and we know that
944 all bytes beyond EOF are set to 0 anyway. If this test doesn't work
945 on 64 bit systems, it will result in not using mmap at all in a
946 package. But we want that mmap is treated as usable by autoconf,
947 regardless whether the autoconf test runs on a 32 bit or a 64 bit
948 system.
949
950 Ok, so we know exactly what autoconf is doing. The file is called
951 "conftest.txt", it has a size of 1 byte, the mapping size is the
952 pagesize, the requested protection is PROT_READ | PROT_WRITE, the
953 mapping is MAP_SHARED, the offset is 0.
954
955 If all these requirements are given, we just return an anonymous map.
956 This will help to get over the autoconf test even on 64 bit systems.
957 The tests are ordered for speed. */
958 #ifdef __x86_64__
959 if (1)
960 #else
961 if (wincap.is_wow64 ())
962 #endif
963 {
964 UNICODE_STRING fname;
965 IO_STATUS_BLOCK io;
966 FILE_STANDARD_INFORMATION fsi;
967
968 if (len == pagesize
969 && prot == (PROT_READ | PROT_WRITE)
970 && flags == MAP_SHARED
971 && off == 0
972 && (RtlSplitUnicodePath (fh->pc.get_nt_native_path (), NULL,
973 &fname),
974 wcscmp (fname.Buffer, L"conftest.txt") == 0)
975 && NT_SUCCESS (NtQueryInformationFile (fh->get_handle (), &io,
976 &fsi, sizeof fsi,
977 FileStandardInformation))
978 && fsi.EndOfFile.QuadPart == 1LL)
979 flags |= MAP_ANONYMOUS;
980 }
981 }
982
983 if (anonymous (flags) || fd == -1)
984 {
985 fh = &fh_anonymous;
986 fd = -1;
987 flags |= MAP_ANONYMOUS;
988 /* Anonymous mappings are always forced to pagesize length with
989 no offset. */
990 len = roundup2 (len, pagesize);
991 off = 0;
992 }
993 else if (fh->get_device () == FH_FS)
994 {
995 /* EACCES error conditions according to SUSv3. File must be opened
996 for reading, regardless of the requested protection, and file must
997 be opened for writing when PROT_WRITE together with MAP_SHARED
998 is requested. */
999 if (!(fh->get_access () & GENERIC_READ)
1000 || (!(fh->get_access () & GENERIC_WRITE)
1001 && (prot & PROT_WRITE) && !priv (flags)))
1002 {
1003 set_errno (EACCES);
1004 goto out;
1005 }
1006
1007 /* You can't create mappings with PAGE_EXECUTE protection if
1008 the file isn't explicitely opened with EXECUTE access. */
1009 OBJECT_ATTRIBUTES attr;
1010 NTSTATUS status;
1011 HANDLE h;
1012 IO_STATUS_BLOCK io;
1013
1014 InitializeObjectAttributes (&attr, &ro_u_empty, fh->pc.objcaseinsensitive (),
1015 fh->get_handle (), NULL);
1016 status = NtOpenFile (&h,
1017 fh->get_access () | GENERIC_EXECUTE | SYNCHRONIZE,
1018 &attr, &io, FILE_SHARE_VALID_FLAGS,
1019 FILE_SYNCHRONOUS_IO_NONALERT
1020 | FILE_OPEN_FOR_BACKUP_INTENT);
1021 if (NT_SUCCESS (status))
1022 {
1023 fh_disk_file = new (ccalloc (HEAP_FHANDLER, 1, sizeof *fh_disk_file))
1024 fhandler_disk_file;
1025 fh_disk_file->set_name (fh->pc);
1026 fh_disk_file->set_io_handle (h);
1027 fh_disk_file->set_access (fh->get_access () | GENERIC_EXECUTE);
1028 fh = fh_disk_file;
1029 }
1030 else if (prot & PROT_EXEC)
1031 {
1032 /* TODO: To be or not to be... I'm opting for refusing this
1033 mmap request rather than faking it, but that might break
1034 some non-portable code. */
1035 set_errno (EACCES);
1036 goto out;
1037 }
1038
1039 if (fh->fstat_fs (&st))
1040 {
1041 __seterrno ();
1042 goto out;
1043 }
1044 off_t fsiz = st.st_size;
1045
1046 /* Don't allow file mappings beginning beyond EOF since Windows can't
1047 handle that POSIX like, unless MAP_AUTOGROW flag is set, which
1048 mimics Windows behaviour. */
1049 if (off >= fsiz && !autogrow (flags))
1050 {
1051 /* Instead, it seems suitable to return an anonymous mapping of
1052 the given size instead. Mapped addresses beyond EOF aren't
1053 written back to the file anyway, so the handling is identical
1054 to other pages beyond EOF. */
1055 fh = &fh_anonymous;
1056 len = roundup2 (len, pagesize);
1057 prot = PROT_READ | PROT_WRITE | __PROT_ATTACH;
1058 flags &= MAP_FIXED;
1059 flags |= MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
1060 fd = -1;
1061 off = 0;
1062 goto go_ahead;
1063 }
1064 fsiz -= off;
1065 /* We're creating the pages beyond EOF as reserved, anonymous pages.
1066 Note that this isn't done in 64 bit environments since apparently
1067 64 bit systems don't support the AT_ROUND_TO_PAGE flag, which is
1068 required to get this right. Too bad. */
1069 #ifndef __x86_64__
1070 if (!wincap.is_wow64 ()
1071 && (((off_t) len > fsiz && !autogrow (flags))
1072 || roundup2 (len, wincap.page_size ())
1073 < roundup2 (len, pagesize)))
1074 orig_len = len;
1075 #endif
1076 if ((off_t) len > fsiz)
1077 {
1078 if (autogrow (flags))
1079 {
1080 /* Allow mapping beyond EOF if MAP_AUTOGROW flag is set.
1081 Check if file has been opened for writing, otherwise
1082 MAP_AUTOGROW is invalid. */
1083 if (!(fh->get_access () & GENERIC_WRITE))
1084 {
1085 set_errno (EINVAL);
1086 goto out;
1087 }
1088 }
1089 else
1090 /* Otherwise, don't map beyond EOF, since Windows would change
1091 the file to the new length, in contrast to POSIX. */
1092 len = fsiz;
1093 }
1094
1095 /* If the requested offset + len is <= file size, drop MAP_AUTOGROW.
1096 This simplifes fhandler::mmap's job. */
1097 if (autogrow (flags) && (off + (off_t) len) <= fsiz)
1098 flags &= ~MAP_AUTOGROW;
1099 }
1100
1101 go_ahead:
1102
1103 /* MAP_NORESERVE is only supported on private anonymous mappings.
1104 Remove that bit from flags so that later code doesn't have to
1105 test all bits. */
1106 if (noreserve (flags) && (!anonymous (flags) || !priv (flags)))
1107 flags &= ~MAP_NORESERVE;
1108
1109 LIST_LOCK ();
1110 map_list = mmapped_areas.get_list_by_fd (fd, &st);
1111
1112 /* Test if an existing anonymous mapping can be recycled. */
1113 if (map_list && anonymous (flags))
1114 {
1115 caddr_t tried = map_list->try_map (addr, len, flags, off);
1116 /* try_map returns NULL if no map matched, otherwise it returns
1117 a valid address, or MAP_FAILED in case of a fatal error. */
1118 if (tried)
1119 {
1120 ret = tried;
1121 goto out_with_unlock;
1122 }
1123 }
1124
1125 #ifdef __x86_64__
1126 addr = mmap_alloc.alloc (addr, orig_len ?: len, fixed (flags));
1127 #else
1128 if (orig_len)
1129 {
1130 /* If the requested length is bigger than the file size, we try to
1131 allocate an area of the full size first. This area is immediately
1132 deallocated and the address we got is used as base address for the
1133 subsequent real mappings. This ensures that we have enough space
1134 for the whole thing. */
1135 orig_len = roundup2 (orig_len, pagesize);
1136 PVOID newaddr = VirtualAlloc (addr, orig_len, MEM_TOP_DOWN | MEM_RESERVE,
1137 PAGE_READWRITE);
1138 if (!newaddr)
1139 {
1140 /* If addr is not NULL, but MAP_FIXED isn't given, allow the OS
1141 to choose. */
1142 if (addr && !fixed (flags))
1143 newaddr = VirtualAlloc (NULL, orig_len, MEM_TOP_DOWN | MEM_RESERVE,
1144 PAGE_READWRITE);
1145 if (!newaddr)
1146 {
1147 __seterrno ();
1148 goto out_with_unlock;
1149 }
1150 }
1151 if (!VirtualFree (newaddr, 0, MEM_RELEASE))
1152 {
1153 __seterrno ();
1154 goto out_with_unlock;
1155 }
1156 addr = newaddr;
1157 }
1158 #endif
1159
1160 base = mmap_worker (map_list, fh, (caddr_t) addr, len, prot, flags, fd, off,
1161 &st);
1162 if (!base)
1163 goto out_with_unlock;
1164
1165 if (orig_len)
1166 {
1167 /* If the requested length is bigger than the file size, the
1168 remainder is created as anonymous mapping. Actually two
1169 mappings are created, first the remainder from the file end to
1170 the next 64K boundary as accessible pages with the same
1171 protection as the file's pages, then as much pages as necessary
1172 to accomodate the requested length, but as reserved pages which
1173 raise a SIGBUS when trying to access them. AT_ROUND_TO_PAGE
1174 and page protection on shared pages is only supported by 32 bit NT,
1175 so don't even try on WOW64. This is accomplished by not setting
1176 orig_len on WOW64 above. */
1177 #if 0
1178 orig_len = roundup2 (orig_len, pagesize);
1179 #endif
1180 len = roundup2 (len, wincap.page_size ());
1181 if (orig_len - len)
1182 {
1183 orig_len -= len;
1184 size_t valid_page_len = orig_len % pagesize;
1185 size_t sigbus_page_len = orig_len - valid_page_len;
1186
1187 caddr_t at_base = base + len;
1188 if (valid_page_len)
1189 {
1190 prot |= __PROT_FILLER;
1191 flags &= MAP_SHARED | MAP_PRIVATE;
1192 flags |= MAP_ANONYMOUS | MAP_FIXED;
1193 at_base = mmap_worker (NULL, &fh_anonymous, at_base,
1194 valid_page_len, prot, flags, -1, 0, NULL);
1195 if (!at_base)
1196 {
1197 fh->munmap (fh->get_handle (), base, len);
1198 set_errno (ENOMEM);
1199 goto out_with_unlock;
1200 }
1201 at_base += valid_page_len;
1202 }
1203 if (sigbus_page_len)
1204 {
1205 prot = PROT_READ | PROT_WRITE | __PROT_ATTACH;
1206 flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED;
1207 at_base = mmap_worker (NULL, &fh_anonymous, at_base,
1208 sigbus_page_len, prot, flags, -1, 0, NULL);
1209 if (!at_base)
1210 debug_printf ("Warning: Mapping beyond EOF failed, %E");
1211 }
1212 }
1213 }
1214
1215 ret = base;
1216
1217 out_with_unlock:
1218 LIST_UNLOCK ();
1219
1220 out:
1221
1222 if (fh_disk_file)
1223 {
1224 NtClose (fh_disk_file->get_handle ());
1225 delete fh;
1226 }
1227
1228 syscall_printf ("%p = mmap() ", ret);
1229 return ret;
1230 }
1231
1232 #ifdef __x86_64__
1233 EXPORT_ALIAS (mmap64, mmap)
1234 #else
1235 extern "C" void *
1236 mmap (void *addr, size_t len, int prot, int flags, int fd, _off_t off)
1237 {
1238 return mmap64 (addr, len, prot, flags, fd, (off_t)off);
1239 }
1240 #endif
1241
1242 /* munmap () removes all mmapped pages between addr and addr+len. */
1243
1244 extern "C" int
1245 munmap (void *addr, size_t len)
1246 {
1247 syscall_printf ("munmap (addr %p, len %lu)", addr, len);
1248
1249 /* Error conditions according to SUSv3 */
1250 if (!addr || !len || check_invalid_virtual_addr (addr, len))
1251 {
1252 set_errno (EINVAL);
1253 return -1;
1254 }
1255 size_t pagesize = wincap.allocation_granularity ();
1256 if (((uintptr_t) addr % pagesize) || !len)
1257 {
1258 set_errno (EINVAL);
1259 return -1;
1260 }
1261 len = roundup2 (len, pagesize);
1262
1263 LIST_LOCK ();
1264
1265 /* Iterate through the map, unmap pages between addr and addr+len
1266 in all maps. */
1267 mmap_list *map_list, *next_map_list;
1268 LIST_FOREACH_SAFE (map_list, &mmapped_areas.lists, ml_next, next_map_list)
1269 {
1270 mmap_record *rec, *next_rec;
1271 caddr_t u_addr;
1272 DWORD u_len;
1273
1274 LIST_FOREACH_SAFE (rec, &map_list->recs, mr_next, next_rec)
1275 {
1276 if (!rec->match ((caddr_t) addr, len, u_addr, u_len))
1277 continue;
1278 if (rec->unmap_pages (u_addr, u_len))
1279 {
1280 /* The whole record has been unmapped, so we now actually
1281 unmap it from the system in full length... */
1282 fhandler_base *fh = rec->alloc_fh ();
1283 fh->munmap (rec->get_handle (),
1284 rec->get_address (),
1285 rec->get_len ());
1286 rec->free_fh (fh);
1287
1288 /* ...and delete the record. */
1289 if (map_list->del_record (rec))
1290 {
1291 /* Yay, the last record has been removed from the list,
1292 we can remove the list now, too. */
1293 mmapped_areas.del_list (map_list);
1294 break;
1295 }
1296 }
1297 }
1298 }
1299
1300 LIST_UNLOCK ();
1301 syscall_printf ("0 = munmap(): %p", addr);
1302 return 0;
1303 }
1304
1305 /* Sync file with memory. Ignore flags for now. */
1306
1307 extern "C" int
1308 msync (void *addr, size_t len, int flags)
1309 {
1310 int ret = -1;
1311 mmap_list *map_list;
1312
1313 syscall_printf ("msync (addr: %p, len %lu, flags %y)", addr, len, flags);
1314
1315 pthread_testcancel ();
1316
1317 LIST_LOCK ();
1318
1319 if (((uintptr_t) addr % wincap.allocation_granularity ())
1320 || (flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE))
1321 || ((flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC)))
1322 {
1323 set_errno (EINVAL);
1324 goto out;
1325 }
1326 #if 0 /* If I only knew why I did that... */
1327 len = roundup2 (len, wincap.allocation_granularity ());
1328 #endif
1329
1330 /* Iterate through the map, looking for the mmapped area.
1331 Error if not found. */
1332 LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
1333 {
1334 mmap_record *rec;
1335 LIST_FOREACH (rec, &map_list->recs, mr_next)
1336 {
1337 if (rec->access ((caddr_t) addr))
1338 {
1339 /* Check whole area given by len. */
1340 for (DWORD i = wincap.allocation_granularity ();
1341 i < len;
1342 i += wincap.allocation_granularity ())
1343 if (!rec->access ((caddr_t) addr + i))
1344 {
1345 set_errno (ENOMEM);
1346 goto out;
1347 }
1348 fhandler_base *fh = rec->alloc_fh ();
1349 ret = fh->msync (rec->get_handle (), (caddr_t) addr, len, flags);
1350 rec->free_fh (fh);
1351 goto out;
1352 }
1353 }
1354 }
1355
1356 /* No matching mapping exists. */
1357 set_errno (ENOMEM);
1358
1359 out:
1360 LIST_UNLOCK ();
1361 syscall_printf ("%R = msync()", ret);
1362 return ret;
1363 }
1364
1365 /* Set memory protection */
1366
1367 extern "C" int
1368 mprotect (void *addr, size_t len, int prot)
1369 {
1370 bool in_mapped = false;
1371 bool ret = false;
1372 DWORD old_prot;
1373 DWORD new_prot = 0;
1374
1375 syscall_printf ("mprotect (addr: %p, len %lu, prot %y)", addr, len, prot);
1376
1377 /* See comment in mmap64 for a description. */
1378 size_t pagesize = wincap.allocation_granularity ();
1379 if ((uintptr_t) addr % pagesize)
1380 {
1381 set_errno (EINVAL);
1382 goto out;
1383 }
1384 len = roundup2 (len, pagesize);
1385
1386 LIST_LOCK ();
1387
1388 /* Iterate through the map, protect pages between addr and addr+len
1389 in all maps. */
1390 mmap_list *map_list;
1391 LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
1392 {
1393 mmap_record *rec;
1394 caddr_t u_addr;
1395 DWORD u_len;
1396
1397 LIST_FOREACH (rec, &map_list->recs, mr_next)
1398 {
1399 if (!rec->match ((caddr_t) addr, len, u_addr, u_len))
1400 continue;
1401 in_mapped = true;
1402 if (rec->attached ())
1403 continue;
1404 new_prot = gen_protect (prot, rec->get_flags ());
1405 if (rec->noreserve ())
1406 {
1407 if (new_prot == PAGE_NOACCESS)
1408 ret = VirtualFree (u_addr, u_len, MEM_DECOMMIT);
1409 else
1410 ret = !!VirtualAlloc (u_addr, u_len, MEM_COMMIT, new_prot);
1411 }
1412 else
1413 ret = VirtualProtect (u_addr, u_len, new_prot, &old_prot);
1414 if (!ret)
1415 {
1416 __seterrno ();
1417 break;
1418 }
1419 }
1420 }
1421
1422 LIST_UNLOCK ();
1423
1424 if (!in_mapped)
1425 {
1426 int flags = 0;
1427 MEMORY_BASIC_INFORMATION mbi;
1428
1429 ret = VirtualQuery (addr, &mbi, sizeof mbi);
1430 if (ret)
1431 {
1432 /* If write protection is requested, check if the page was
1433 originally protected writecopy. In this case call VirtualProtect
1434 requesting PAGE_WRITECOPY, otherwise the VirtualProtect will fail
1435 on NT version >= 5.0 */
1436 if (prot & PROT_WRITE)
1437 {
1438 if (mbi.AllocationProtect == PAGE_WRITECOPY
1439 || mbi.AllocationProtect == PAGE_EXECUTE_WRITECOPY)
1440 flags = MAP_PRIVATE;
1441 }
1442 new_prot = gen_protect (prot, flags);
1443 if (new_prot != PAGE_NOACCESS && mbi.State == MEM_RESERVE)
1444 ret = VirtualAlloc (addr, len, MEM_COMMIT, new_prot);
1445 else
1446 ret = VirtualProtect (addr, len, new_prot, &old_prot);
1447 }
1448 if (!ret)
1449 __seterrno ();
1450 }
1451
1452 out:
1453
1454 syscall_printf ("%R = mprotect ()", ret ? 0 : -1);
1455 return ret ? 0 : -1;
1456 }
1457
1458 extern "C" int
1459 mlock (const void *addr, size_t len)
1460 {
1461 int ret = -1;
1462
1463 /* Align address and length values to page size. */
1464 size_t pagesize = wincap.allocation_granularity ();
1465 PVOID base = (PVOID) rounddown((uintptr_t) addr, pagesize);
1466 SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base) + len,
1467 pagesize);
1468 NTSTATUS status = 0;
1469 do
1470 {
1471 status = NtLockVirtualMemory (NtCurrentProcess (), &base, &size,
1472 MAP_PROCESS);
1473 if (status == STATUS_WORKING_SET_QUOTA)
1474 {
1475 /* The working set is too small, try to increase it so that the
1476 requested locking region fits in. Unfortunately I don't know
1477 any function which would return the currently locked pages of
1478 a process (no go with NtQueryVirtualMemory).
1479
1480 So, except for the border cases, what we do here is something
1481 really embarrassing. We raise the working set by 64K at a time
1482 and retry, until either we fail to raise the working set size
1483 further, or until NtLockVirtualMemory returns successfully (or
1484 with another error). */
1485 SIZE_T min, max;
1486 if (!GetProcessWorkingSetSize (GetCurrentProcess (), &min, &max))
1487 {
1488 set_errno (ENOMEM);
1489 break;
1490 }
1491 if (min < size)
1492 min = size + pagesize;
1493 else if (size < pagesize)
1494 min += size;
1495 else
1496 min += pagesize;
1497 if (max < min)
1498 max = min;
1499 if (!SetProcessWorkingSetSize (GetCurrentProcess (), min, max))
1500 {
1501 set_errno (ENOMEM);
1502 break;
1503 }
1504 }
1505 else if (!NT_SUCCESS (status))
1506 __seterrno_from_nt_status (status);
1507 else
1508 ret = 0;
1509 }
1510 while (status == STATUS_WORKING_SET_QUOTA);
1511
1512 syscall_printf ("%R = mlock(%p, %lu)", ret, addr, len);
1513 return ret;
1514 }
1515
1516 extern "C" int
1517 munlock (const void *addr, size_t len)
1518 {
1519 int ret = -1;
1520
1521 /* Align address and length values to page size. */
1522 size_t pagesize = wincap.allocation_granularity ();
1523 PVOID base = (PVOID) rounddown((uintptr_t) addr, pagesize);
1524 SIZE_T size = roundup2 (((uintptr_t) addr - (uintptr_t) base) + len,
1525 pagesize);
1526 NTSTATUS status = NtUnlockVirtualMemory (NtCurrentProcess (), &base, &size,
1527 MAP_PROCESS);
1528 if (!NT_SUCCESS (status))
1529 __seterrno_from_nt_status (status);
1530 else
1531 ret = 0;
1532
1533 syscall_printf ("%R = munlock(%p, %lu)", ret, addr, len);
1534 return ret;
1535 }
1536
1537 extern "C" int
1538 posix_madvise (void *addr, size_t len, int advice)
1539 {
1540 int ret;
1541 /* Check parameters. */
1542 if (advice < POSIX_MADV_NORMAL || advice > POSIX_MADV_DONTNEED
1543 || !len)
1544 ret = EINVAL;
1545 else
1546 {
1547 /* Check requested memory area. */
1548 MEMORY_BASIC_INFORMATION m;
1549 char *p = (char *) addr;
1550 char *endp = p + len;
1551 while (p < endp)
1552 {
1553 if (!VirtualQuery (p, &m, sizeof m) || m.State == MEM_FREE)
1554 {
1555 ret = ENOMEM;
1556 break;
1557 }
1558 p = (char *) m.BaseAddress + m.RegionSize;
1559 }
1560 ret = 0;
1561 }
1562
1563 syscall_printf ("%d = posix_madvise(%p, %lu, %d)", ret, addr, len, advice);
1564 /* Eventually do nothing. */
1565 return 0;
1566 }
1567
1568 /*
1569 * Base implementation:
1570 *
1571 * `mmap' returns ENODEV as documented in SUSv2.
1572 * In contrast to the global function implementation, the member function
1573 * `mmap' has to return the mapped base address in `addr' and the handle to
1574 * the mapping object as return value. In case of failure, the fhandler
1575 * mmap has to close that handle by itself and return INVALID_HANDLE_VALUE.
1576 *
1577 * `munmap' and `msync' get the handle to the mapping object as first parameter
1578 * additionally.
1579 */
1580 HANDLE
1581 fhandler_base::mmap (caddr_t *addr, size_t len, int prot,
1582 int flags, off_t off)
1583 {
1584 set_errno (ENODEV);
1585 return INVALID_HANDLE_VALUE;
1586 }
1587
1588 int
1589 fhandler_base::munmap (HANDLE h, caddr_t addr, size_t len)
1590 {
1591 set_errno (ENODEV);
1592 return -1;
1593 }
1594
1595 int
1596 fhandler_base::msync (HANDLE h, caddr_t addr, size_t len, int flags)
1597 {
1598 set_errno (ENODEV);
1599 return -1;
1600 }
1601
1602 bool
1603 fhandler_base::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
1604 off_t offset, DWORD size,
1605 void *address)
1606 {
1607 set_errno (ENODEV);
1608 return -1;
1609 }
1610
1611 /* Implementation for anonymous maps. Using fhandler_dev_zero looks
1612 quite the natural way. */
1613 HANDLE
1614 fhandler_dev_zero::mmap (caddr_t *addr, size_t len, int prot,
1615 int flags, off_t off)
1616 {
1617 HANDLE h;
1618 void *base;
1619
1620 if (priv (flags) && !filler (prot))
1621 {
1622 /* Private anonymous maps are now implemented using VirtualAlloc.
1623 This has two advantages:
1624
1625 - VirtualAlloc has a smaller footprint than a copy-on-write
1626 anonymous map.
1627
1628 - It supports decommitting using VirtualFree, in contrast to
1629 section maps. This allows minimum footprint private maps,
1630 when using the (non-POSIX, yay-Linux) MAP_NORESERVE flag.
1631 */
1632 DWORD protect = gen_protect (prot, flags);
1633 DWORD alloc_type = MEM_TOP_DOWN | MEM_RESERVE
1634 | (noreserve (flags) ? 0 : MEM_COMMIT);
1635 base = VirtualAlloc (*addr, len, alloc_type, protect);
1636 if (!base && addr && !fixed (flags))
1637 base = VirtualAlloc (NULL, len, alloc_type, protect);
1638 if (!base || (fixed (flags) && base != *addr))
1639 {
1640 if (!base)
1641 __seterrno ();
1642 else
1643 {
1644 VirtualFree (base, 0, MEM_RELEASE);
1645 set_errno (EINVAL);
1646 debug_printf ("VirtualAlloc: address shift with MAP_FIXED given");
1647 }
1648 return INVALID_HANDLE_VALUE;
1649 }
1650 h = (HANDLE) 1; /* Fake handle to indicate success. */
1651 }
1652 else
1653 {
1654 h = CreateMapping (get_handle (), len, off, get_access (), prot, flags);
1655 if (!h)
1656 {
1657 __seterrno ();
1658 debug_printf ("CreateMapping failed with %E");
1659 return INVALID_HANDLE_VALUE;
1660 }
1661
1662 base = MapView (h, *addr, len, get_access(), prot, flags, off);
1663 if (!base || (fixed (flags) && base != *addr))
1664 {
1665 if (!base)
1666 __seterrno ();
1667 else
1668 {
1669 NtUnmapViewOfSection (NtCurrentProcess (), base);
1670 set_errno (EINVAL);
1671 debug_printf ("MapView: address shift with MAP_FIXED given");
1672 }
1673 NtClose (h);
1674 return INVALID_HANDLE_VALUE;
1675 }
1676 }
1677 *addr = (caddr_t) base;
1678 return h;
1679 }
1680
1681 int
1682 fhandler_dev_zero::munmap (HANDLE h, caddr_t addr, size_t len)
1683 {
1684 if (h == (HANDLE) 1) /* See fhandler_dev_zero::mmap. */
1685 VirtualFree (addr, 0, MEM_RELEASE);
1686 else
1687 {
1688 NtUnmapViewOfSection (NtCurrentProcess (), addr);
1689 NtClose (h);
1690 }
1691 return 0;
1692 }
1693
1694 int
1695 fhandler_dev_zero::msync (HANDLE h, caddr_t addr, size_t len, int flags)
1696 {
1697 return 0;
1698 }
1699
1700 bool
1701 fhandler_dev_zero::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
1702 off_t offset, DWORD size,
1703 void *address)
1704 {
1705 /* Re-create the map */
1706 void *base;
1707 if (priv (flags) && !filler (prot))
1708 {
1709 DWORD alloc_type = MEM_RESERVE | (noreserve (flags) ? 0 : MEM_COMMIT);
1710 /* Always allocate R/W so that ReadProcessMemory doesn't fail
1711 due to a non-writable target address. The protection is
1712 set to the correct one anyway in the fixup loop. */
1713 base = VirtualAlloc (address, size, alloc_type, PAGE_READWRITE);
1714 }
1715 else
1716 base = MapView (h, address, size, get_access (), prot, flags, offset);
1717 if (base != address)
1718 {
1719 MEMORY_BASIC_INFORMATION m;
1720 VirtualQuery (address, &m, sizeof (m));
1721 system_printf ("requested %p != %p mem alloc base %p, state %y, "
1722 "size %lu, %E", address, base, m.AllocationBase, m.State,
1723 m.RegionSize);
1724 }
1725 return base == address;
1726 }
1727
1728 /* Implementation for disk files and anonymous mappings. */
1729 HANDLE
1730 fhandler_disk_file::mmap (caddr_t *addr, size_t len, int prot,
1731 int flags, off_t off)
1732 {
1733 HANDLE h = CreateMapping (get_handle (), len, off, get_access (),
1734 prot, flags);
1735 if (!h)
1736 {
1737 __seterrno ();
1738 debug_printf ("CreateMapping failed with %E");
1739 return INVALID_HANDLE_VALUE;
1740 }
1741
1742 void *base = MapView (h, *addr, len, get_access (), prot, flags, off);
1743 if (!base || (fixed (flags) && base != *addr))
1744 {
1745 if (!base)
1746 __seterrno ();
1747 else
1748 {
1749 NtUnmapViewOfSection (NtCurrentProcess (), base);
1750 set_errno (EINVAL);
1751 debug_printf ("MapView: address shift with MAP_FIXED given");
1752 }
1753 NtClose (h);
1754 return INVALID_HANDLE_VALUE;
1755 }
1756
1757 *addr = (caddr_t) base;
1758 return h;
1759 }
1760
1761 int
1762 fhandler_disk_file::munmap (HANDLE h, caddr_t addr, size_t len)
1763 {
1764 NtUnmapViewOfSection (NtCurrentProcess (), addr);
1765 NtClose (h);
1766 return 0;
1767 }
1768
1769 int
1770 fhandler_disk_file::msync (HANDLE h, caddr_t addr, size_t len, int flags)
1771 {
1772 const int retry = 100;
1773 /* The wisdom of google tells us that FlushViewOfFile may fail with
1774 ERROR_LOCK_VIOLATION if "if the memory system is writing dirty
1775 pages to disk". And, we've seen reports of this happening in the
1776 cygwin list. So retry 99 times and hope we get lucky. */
1777 for (int i = 0; i < retry; i++)
1778 if (FlushViewOfFile (addr, len))
1779 {
1780 /* FlushViewOfFile just triggers the action and returns immediately,
1781 so it's equivalent to MS_ASYNC. MS_SYNC requires another call to
1782 FlushFileBuffers. */
1783 if (flags & MS_SYNC)
1784 FlushFileBuffers (h);
1785 return 0;
1786 }
1787 else if (GetLastError () != ERROR_LOCK_VIOLATION)
1788 break;
1789 else if (i < (retry - 1))
1790 yield ();
1791
1792 __seterrno ();
1793 return -1;
1794 }
1795
1796 bool
1797 fhandler_disk_file::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
1798 off_t offset, DWORD size,
1799 void *address)
1800 {
1801 /* Re-create the map */
1802 void *base = MapView (h, address, size, get_access (), prot, flags, offset);
1803 if (base != address)
1804 {
1805 MEMORY_BASIC_INFORMATION m;
1806 VirtualQuery (address, &m, sizeof (m));
1807 system_printf ("requested %p != %p mem alloc base %p, state %y, "
1808 "size %lu, %E", address, base, m.AllocationBase, m.State,
1809 m.RegionSize);
1810 }
1811 return base == address;
1812 }
1813
1814 HANDLE
1815 fhandler_dev_mem::mmap (caddr_t *addr, size_t len, int prot,
1816 int flags, off_t off)
1817 {
1818 if (off >= (off_t) mem_size
1819 || len >= mem_size
1820 || off + len >= mem_size)
1821 {
1822 set_errno (EINVAL);
1823 debug_printf ("-1 = mmap(): illegal parameter, set EINVAL");
1824 return INVALID_HANDLE_VALUE;
1825 }
1826
1827 OBJECT_ATTRIBUTES attr;
1828 InitializeObjectAttributes (&attr, &ro_u_pmem,
1829 OBJ_CASE_INSENSITIVE | OBJ_INHERIT,
1830 NULL, NULL);
1831
1832 /* Section access is bit-wise ored, while on the Win32 level access
1833 is only one of the values. It's not quite clear if the section
1834 access has to be defined this way, or if SECTION_ALL_ACCESS would
1835 be sufficient but this worked fine so far, so why change? */
1836 ACCESS_MASK section_access;
1837 if (prot & PROT_WRITE)
1838 section_access = SECTION_MAP_READ | SECTION_MAP_WRITE;
1839 else
1840 section_access = SECTION_MAP_READ;
1841
1842 HANDLE h;
1843 NTSTATUS status = NtOpenSection (&h, section_access, &attr);
1844 if (!NT_SUCCESS (status))
1845 {
1846 __seterrno_from_nt_status (status);
1847 debug_printf ("-1 = mmap(): NtOpenSection failed with %E");
1848 return INVALID_HANDLE_VALUE;
1849 }
1850
1851 void *base = MapView (h, *addr, len, get_access (), prot,
1852 flags | MAP_ANONYMOUS, off);
1853 if (!base || (fixed (flags) && base != *addr))
1854 {
1855 if (!base)
1856 __seterrno ();
1857 else
1858 {
1859 NtUnmapViewOfSection (NtCurrentProcess (), base);
1860 set_errno (EINVAL);
1861 debug_printf ("MapView: address shift with MAP_FIXED given");
1862 }
1863 NtClose (h);
1864 return INVALID_HANDLE_VALUE;
1865 }
1866
1867 *addr = (caddr_t) base;
1868 return h;
1869 }
1870
1871 int
1872 fhandler_dev_mem::munmap (HANDLE h, caddr_t addr, size_t len)
1873 {
1874 NTSTATUS status;
1875 if (!NT_SUCCESS (status = NtUnmapViewOfSection (NtCurrentProcess (), addr)))
1876 {
1877 __seterrno_from_nt_status (status);
1878 return -1;
1879 }
1880 NtClose (h);
1881 return 0;
1882 }
1883
1884 int
1885 fhandler_dev_mem::msync (HANDLE h, caddr_t addr, size_t len, int flags)
1886 {
1887 return 0;
1888 }
1889
1890 bool
1891 fhandler_dev_mem::fixup_mmap_after_fork (HANDLE h, int prot, int flags,
1892 off_t offset, DWORD size,
1893 void *address)
1894 {
1895 void *base = MapView (h, address, size, get_access (), prot,
1896 flags | MAP_ANONYMOUS, offset);
1897 if (base != address)
1898 {
1899 MEMORY_BASIC_INFORMATION m;
1900 VirtualQuery (address, &m, sizeof (m));
1901 system_printf ("requested %p != %p mem alloc base %p, state %y, "
1902 "size %lu, %E", address, base, m.AllocationBase, m.State,
1903 m.RegionSize);
1904 }
1905 return base == address;
1906 }
1907
1908 /* Call to re-create all the file mappings in a forked child. Called from
1909 the child in initialization. At this point we are passed a valid
1910 mmapped_areas map, and all the HANDLE's are valid for the child, but
1911 none of the mapped areas are in our address space. We need to iterate
1912 through the map, doing the MapViewOfFile calls. */
1913
1914 int __stdcall
1915 fixup_mmaps_after_fork (HANDLE parent)
1916 {
1917 /* Iterate through the map */
1918 mmap_list *map_list;
1919 LIST_FOREACH (map_list, &mmapped_areas.lists, ml_next)
1920 {
1921 mmap_record *rec;
1922 LIST_FOREACH (rec, &map_list->recs, mr_next)
1923 {
1924 debug_printf ("fd %d, h %p, address %p, len %ly, prot: %y, "
1925 "flags: %y, offset %Y",
1926 rec->get_fd (), rec->get_handle (), rec->get_address (),
1927 rec->get_len (), rec->get_prot (), rec->get_flags (),
1928 rec->get_offset ());
1929
1930 fhandler_base *fh = rec->alloc_fh ();
1931 bool ret = fh->fixup_mmap_after_fork (rec->get_handle (),
1932 rec->get_prot (),
1933 rec->get_flags () | MAP_FIXED,
1934 rec->get_offset (),
1935 rec->get_len (),
1936 rec->get_address ());
1937 rec->free_fh (fh);
1938
1939 if (!ret)
1940 {
1941 if (rec->attached ())
1942 {
1943 system_printf ("Warning: Fixup mapping beyond EOF failed");
1944 continue;
1945 }
1946 return -1;
1947 }
1948
1949 MEMORY_BASIC_INFORMATION mbi;
1950 DWORD old_prot;
1951
1952 for (char *address = rec->get_address ();
1953 address < rec->get_address () + rec->get_len ();
1954 address += mbi.RegionSize)
1955 {
1956 if (!VirtualQueryEx (parent, address, &mbi, sizeof mbi))
1957 {
1958 system_printf ("VirtualQueryEx failed for MAP_PRIVATE "
1959 "address %p, %E", address);
1960 return -1;
1961 }
1962 /* Just skip reserved pages. */
1963 if (mbi.State == MEM_RESERVE)
1964 continue;
1965 /* Copy-on-write pages must be copied to the child to circumvent
1966 a strange notion how copy-on-write is supposed to work. */
1967 if (rec->priv ())
1968 {
1969 if (rec->noreserve ()
1970 && !VirtualAlloc (address, mbi.RegionSize,
1971 MEM_COMMIT, PAGE_READWRITE))
1972 {
1973 system_printf ("VirtualAlloc failed for MAP_PRIVATE "
1974 "address %p, %E", address);
1975 return -1;
1976 }
1977 if (mbi.Protect == PAGE_NOACCESS
1978 && !VirtualProtectEx (parent, address, mbi.RegionSize,
1979 PAGE_READONLY, &old_prot))
1980 {
1981 system_printf ("VirtualProtectEx failed for MAP_PRIVATE "
1982 "address %p, %E", address);
1983 return -1;
1984 }
1985 else if ((mbi.AllocationProtect == PAGE_WRITECOPY
1986 || mbi.AllocationProtect == PAGE_EXECUTE_WRITECOPY)
1987 && (mbi.Protect == PAGE_READWRITE
1988 || mbi.Protect == PAGE_EXECUTE_READWRITE))
1989 /* A WRITECOPY page which has been written to is set to
1990 READWRITE, but that's an incompatible protection to
1991 set the page to. Convert the protection to WRITECOPY
1992 so that the below VirtualProtect doesn't fail. */
1993 mbi.Protect <<= 1;
1994
1995 if (!ReadProcessMemory (parent, address, address,
1996 mbi.RegionSize, NULL))
1997 {
1998 system_printf ("ReadProcessMemory failed for MAP_PRIVATE "
1999 "address %p, %E", address);
2000 return -1;
2001 }
2002 if (mbi.Protect == PAGE_NOACCESS
2003 && !VirtualProtectEx (parent, address, mbi.RegionSize,
2004 PAGE_NOACCESS, &old_prot))
2005 {
2006 system_printf ("WARNING: VirtualProtectEx to return to "
2007 "PAGE_NOACCESS state in parent failed for "
2008 "MAP_PRIVATE address %p, %E", address);
2009 return -1;
2010 }
2011 }
2012 /* Set child page protection to parent protection */
2013 if (!VirtualProtect (address, mbi.RegionSize,
2014 mbi.Protect, &old_prot))
2015 {
2016 MEMORY_BASIC_INFORMATION m;
2017 VirtualQuery (address, &m, sizeof m);
2018 system_printf ("VirtualProtect failed for "
2019 "address %p, "
2020 "parentstate: %y, "
2021 "state: %y, "
2022 "parentprot: %y, "
2023 "prot: %y, %E",
2024 address, mbi.State, m.State,
2025 mbi.Protect, m.Protect);
2026 return -1;
2027 }
2028 }
2029 }
2030 }
2031
2032 debug_printf ("succeeded");
2033 return 0;
2034 }
This page took 0.131474 seconds and 5 git commands to generate.