procs_windows_amd64.asm 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. bits 64
  2. global __chkstk
  3. global _tls_index
  4. global _fltused
  5. section .data
  6. _tls_index: dd 0
  7. _fltused: dd 0x9875
  8. section .text
  9. ; NOTE(flysand): The function call to __chkstk is called
  10. ; by the compiler, when we're allocating arrays larger than
  11. ; a page size. The reason is because the OS doesn't map the
  12. ; whole stack into memory all at once, but does so page-by-page.
  13. ; When the next page is touched, the CPU generates a page fault,
  14. ; which *the OS* is handling by allocating the next page in the
  15. ; stack until we reach the limit of stack size.
  16. ;
  17. ; This page is called the guard page, touching it will extend
  18. ; the size of the stack and overwrite the stack limit in the TEB.
  19. ;
  20. ; If we allocate a large enough array and start writing from the
  21. ; bottom of it, it's possible that we may start touching
  22. ; non-contiguous pages which are unmapped. OS only maps the stack
  23. ; page into the memory if the page above it was also mapped.
  24. ;
  25. ; Therefore the compilers insert this routine, the sole purpose
  26. ; of which is to step through the stack starting from the RSP
  27. ; down to the new RSP after allocation, and touch every page
  28. ; of the new allocation so that the stack is fully mapped for
  29. ; the new allocation
  30. ;
  31. ; I've gotten this code by disassembling the output of MSVC long
  32. ; time ago. I don't remember if I've cleaned it up, but it definately
  33. ; stinks.
  34. ;
  35. ; Additional notes:
  36. ; RAX (passed as parameter) holds the allocation's size
  37. ; GS:[0x10] references the current stack limit
  38. ; (i.e. bottom of the stack (i.e. lowest address accessible))
  39. ;
  40. ; Also this stuff is windows-only kind of thing, because linux people
  41. ; didn't think stack that grows is cool enough for them, but the kernel
  42. ; totally supports this kind of stack.
  43. __chkstk:
  44. ;; Allocate 16 bytes to store values of r10 and r11
  45. sub rsp, 0x10
  46. mov [rsp], r10
  47. mov [rsp+0x8], r11
  48. ;; Set r10 to point to the stack as of the moment of the function call
  49. lea r10, [rsp+0x18]
  50. ;; Subtract r10 til the bottom of the stack allocation, if we overflow
  51. ;; reset r10 to 0, we'll crash with segfault anyway
  52. xor r11, r11
  53. sub r10, rax
  54. cmovb r10, r11
  55. ;; Load r11 with the bottom of the stack (lowest allocated address)
  56. mov r11, gs:[0x10] ; NOTE(flysand): gs:[0x10] is stack limit
  57. ;; If the bottom of the allocation is above the bottom of the stack,
  58. ;; we don't need to probe
  59. cmp r10, r11
  60. jnb .end
  61. ;; Align the bottom of the allocation down to page size
  62. and r10w, 0xf000
  63. .loop:
  64. ;; Move the pointer to the next guard page, and touch it by loading 0
  65. ;; into that page
  66. lea r11, [r11-0x1000]
  67. mov byte [r11], 0x0
  68. ;; Did we reach the bottom of the allocation?
  69. cmp r10, r11
  70. jnz .loop
  71. .end:
  72. ;; Restore previous r10 and r11 and return
  73. mov r10, [rsp]
  74. mov r11, [rsp+0x8]
  75. add rsp, 0x10
  76. ret