detect.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. import os
  2. import json
  3. import socket
  4. import urllib.request
  5. from typing import Dict, Any
  6. from pathlib import Path
  7. import subprocess
  8. import platform
  9. import tempfile
  10. from datetime import datetime
  11. import psutil
  12. import machineid # https://github.com/keygen-sh/py-machineid
  13. from rich import print
  14. PACKAGE_DIR = Path(__file__).parent
  15. DATA_DIR = Path(os.getcwd()).resolve()
  16. def get_vm_info():
  17. hw_in_docker = bool(os.getenv('IN_DOCKER', False) in ('1', 'true', 'True', 'TRUE'))
  18. hw_in_vm = False
  19. try:
  20. # check for traces of docker/containerd/podman in cgroup
  21. with open('/proc/self/cgroup', 'r') as procfile:
  22. for line in procfile:
  23. cgroup = line.strip() # .split('/', 1)[-1].lower()
  24. if 'docker' in cgroup or 'containerd' in cgroup or 'podman' in cgroup:
  25. hw_in_docker = True
  26. except Exception:
  27. pass
  28. hw_manufacturer = 'Docker' if hw_in_docker else 'Unknown'
  29. hw_product = 'Container' if hw_in_docker else 'Unknown'
  30. hw_uuid = machineid.id()
  31. if platform.system().lower() == 'darwin':
  32. # Get macOS machine info
  33. hw_manufacturer = 'Apple'
  34. hw_product = 'Mac'
  35. try:
  36. # Hardware:
  37. # Hardware Overview:
  38. # Model Name: Mac Studio
  39. # Model Identifier: Mac13,1
  40. # Model Number: MJMV3LL/A
  41. # ...
  42. # Serial Number (system): M230YYTD77
  43. # Hardware UUID: 39A12B50-1972-5910-8BEE-235AD20C8EE3
  44. # ...
  45. result = subprocess.run(['system_profiler', 'SPHardwareDataType'], capture_output=True, text=True, check=True)
  46. for line in result.stdout.split('\n'):
  47. if 'Model Name:' in line:
  48. hw_product = line.split(':', 1)[-1].strip()
  49. elif 'Model Identifier:' in line:
  50. hw_product += ' ' + line.split(':', 1)[-1].strip()
  51. elif 'Hardware UUID:' in line:
  52. hw_uuid = line.split(':', 1)[-1].strip()
  53. except Exception:
  54. pass
  55. else:
  56. # get Linux machine info
  57. try:
  58. # Getting SMBIOS data from sysfs.
  59. # SMBIOS 2.8 present.
  60. # argo-1 | 2024-10-01T10:40:51Z ERR error="Incoming request ended abruptly: context canceled" connIndex=2 event=1 ingressRule=0 originService=http://archivebox:8000 │
  61. # Handle 0x0100, DMI type 1, 27 bytes
  62. # System Information
  63. # Manufacturer: DigitalOcean
  64. # Product Name: Droplet
  65. # Serial Number: 411922099
  66. # UUID: fb65f41c-ec24-4539-beaf-f941903bdb2c
  67. # ...
  68. # Family: DigitalOcean_Droplet
  69. dmidecode = subprocess.run(['dmidecode', '-t', 'system'], capture_output=True, text=True, check=True)
  70. for line in dmidecode.stdout.split('\n'):
  71. if 'Manufacturer:' in line:
  72. hw_manufacturer = line.split(':', 1)[-1].strip()
  73. elif 'Product Name:' in line:
  74. hw_product = line.split(':', 1)[-1].strip()
  75. elif 'UUID:' in line:
  76. hw_uuid = line.split(':', 1)[-1].strip()
  77. except Exception:
  78. pass
  79. # Check for VM fingerprint in manufacturer/product name
  80. if 'qemu' in hw_product.lower() or 'vbox' in hw_product.lower() or 'lxc' in hw_product.lower() or 'vm' in hw_product.lower():
  81. hw_in_vm = True
  82. # Check for QEMU explicitly in pmap output
  83. try:
  84. result = subprocess.run(['pmap', '1'], capture_output=True, text=True, check=True)
  85. if 'qemu' in result.stdout.lower():
  86. hw_in_vm = True
  87. except Exception:
  88. pass
  89. return {
  90. "hw_in_docker": hw_in_docker,
  91. "hw_in_vm": hw_in_vm,
  92. "hw_manufacturer": hw_manufacturer,
  93. "hw_product": hw_product,
  94. "hw_uuid": hw_uuid,
  95. }
  96. def get_public_ip() -> str:
  97. def fetch_url(url: str) -> str:
  98. with urllib.request.urlopen(url, timeout=5) as response:
  99. return response.read().decode('utf-8').strip()
  100. def fetch_dns(pubip_lookup_host: str) -> str:
  101. return socket.gethostbyname(pubip_lookup_host).strip()
  102. methods = [
  103. (lambda: fetch_url("https://ipinfo.io/ip"), lambda r: r),
  104. (lambda: fetch_url("https://api.ipify.org?format=json"), lambda r: json.loads(r)['ip']),
  105. (lambda: fetch_dns("myip.opendns.com"), lambda r: r),
  106. (lambda: fetch_url("http://whatismyip.akamai.com/"), lambda r: r), # try HTTP as final fallback in case of TLS/system time errors
  107. ]
  108. for fetch, parse in methods:
  109. try:
  110. result = parse(fetch())
  111. if result:
  112. return result
  113. except Exception:
  114. continue
  115. raise Exception("Could not determine public IP address")
  116. def get_local_ip(remote_ip: str='1.1.1.1', remote_port: int=80) -> str:
  117. try:
  118. with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
  119. s.connect((remote_ip, remote_port))
  120. return s.getsockname()[0]
  121. except Exception:
  122. pass
  123. return '127.0.0.1'
  124. ip_addrs = lambda addrs: (a for a in addrs if a.family == socket.AF_INET)
  125. mac_addrs = lambda addrs: (a for a in addrs if a.family == psutil.AF_LINK)
  126. def get_isp_info(ip=None):
  127. # Get public IP
  128. try:
  129. ip = ip or urllib.request.urlopen('https://api.ipify.org').read().decode('utf8')
  130. except Exception:
  131. pass
  132. # Get ISP name, city, and country
  133. data = {}
  134. try:
  135. url = f'https://ipapi.co/{ip}/json/'
  136. response = urllib.request.urlopen(url)
  137. data = json.loads(response.read().decode())
  138. except Exception:
  139. pass
  140. isp = data.get('org', 'Unknown')
  141. city = data.get('city', 'Unknown')
  142. region = data.get('region', 'Unknown')
  143. country = data.get('country_name', 'Unknown')
  144. # Get system DNS resolver servers
  145. dns_server = None
  146. try:
  147. result = subprocess.run(['dig', 'example.com', 'A'], capture_output=True, text=True, check=True).stdout
  148. dns_server = result.split(';; SERVER: ', 1)[-1].split('\n')[0].split('#')[0].strip()
  149. except Exception:
  150. try:
  151. dns_server = Path('/etc/resolv.conf').read_text().split('nameserver ', 1)[-1].split('\n')[0].strip()
  152. except Exception:
  153. dns_server = '127.0.0.1'
  154. print(f'[red]:warning: WARNING: Could not determine DNS server, using {dns_server}[/red]')
  155. # Get DNS resolver's ISP name
  156. # url = f'https://ipapi.co/{dns_server}/json/'
  157. # dns_isp = json.loads(urllib.request.urlopen(url).read().decode()).get('org', 'Unknown')
  158. return {
  159. 'isp': isp,
  160. 'city': city,
  161. 'region': region,
  162. 'country': country,
  163. 'dns_server': dns_server,
  164. # 'net_dns_isp': dns_isp,
  165. }
  166. def get_host_network() -> Dict[str, Any]:
  167. default_gateway_local_ip = get_local_ip()
  168. gateways = psutil.net_if_addrs()
  169. for interface, ips in gateways.items():
  170. for local_ip in ip_addrs(ips):
  171. if default_gateway_local_ip == local_ip.address:
  172. mac_address = next(mac_addrs(ips)).address
  173. public_ip = get_public_ip()
  174. return {
  175. "hostname": max([socket.gethostname(), platform.node()], key=len),
  176. "iface": interface,
  177. "mac_address": mac_address,
  178. "ip_local": local_ip.address,
  179. "ip_public": public_ip,
  180. # "is_behind_nat": local_ip.address != public_ip,
  181. **get_isp_info(public_ip),
  182. }
  183. raise Exception("Could not determine host network info")
  184. def get_os_info() -> Dict[str, Any]:
  185. os_release = platform.release()
  186. if platform.system().lower() == 'darwin':
  187. os_release = 'macOS ' + platform.mac_ver()[0]
  188. else:
  189. try:
  190. os_release = subprocess.run(['lsb_release', '-ds'], capture_output=True, text=True, check=True).stdout.strip()
  191. except Exception:
  192. pass
  193. return {
  194. "os_arch": platform.machine(),
  195. "os_family": platform.system().lower(),
  196. "os_platform": platform.platform(),
  197. "os_kernel": platform.version(),
  198. "os_release": os_release,
  199. }
  200. def get_host_stats() -> Dict[str, Any]:
  201. with tempfile.TemporaryDirectory() as tmp_dir:
  202. tmp_usage = psutil.disk_usage(str(tmp_dir))
  203. app_usage = psutil.disk_usage(str(PACKAGE_DIR))
  204. data_usage = psutil.disk_usage(str(DATA_DIR))
  205. mem_usage = psutil.virtual_memory()
  206. swap_usage = psutil.swap_memory()
  207. return {
  208. "cpu_boot_time": datetime.fromtimestamp(psutil.boot_time()).isoformat(),
  209. "cpu_count": psutil.cpu_count(logical=False),
  210. "cpu_load": psutil.getloadavg(),
  211. # "cpu_pct": psutil.cpu_percent(interval=1),
  212. "mem_virt_used_pct": mem_usage.percent,
  213. "mem_virt_used_gb": round(mem_usage.used / 1024 / 1024 / 1024, 3),
  214. "mem_virt_free_gb": round(mem_usage.free / 1024 / 1024 / 1024, 3),
  215. "mem_swap_used_pct": swap_usage.percent,
  216. "mem_swap_used_gb": round(swap_usage.used / 1024 / 1024 / 1024, 3),
  217. "mem_swap_free_gb": round(swap_usage.free / 1024 / 1024 / 1024, 3),
  218. "disk_tmp_used_pct": tmp_usage.percent,
  219. "disk_tmp_used_gb": round(tmp_usage.used / 1024 / 1024 / 1024, 3),
  220. "disk_tmp_free_gb": round(tmp_usage.free / 1024 / 1024 / 1024, 3), # in GB
  221. "disk_app_used_pct": app_usage.percent,
  222. "disk_app_used_gb": round(app_usage.used / 1024 / 1024 / 1024, 3),
  223. "disk_app_free_gb": round(app_usage.free / 1024 / 1024 / 1024, 3),
  224. "disk_data_used_pct": data_usage.percent,
  225. "disk_data_used_gb": round(data_usage.used / 1024 / 1024 / 1024, 3),
  226. "disk_data_free_gb": round(data_usage.free / 1024 / 1024 / 1024, 3),
  227. }
  228. def get_host_immutable_info(host_info: Dict[str, Any]) -> Dict[str, Any]:
  229. return {
  230. key: value
  231. for key, value in host_info.items()
  232. if key in ['guid', 'net_mac', 'os_family', 'cpu_arch']
  233. }
  234. def get_host_guid() -> str:
  235. return machineid.hashed_id('archivebox')
  236. # Example usage
  237. if __name__ == "__main__":
  238. host_info = {
  239. 'guid': get_host_guid(),
  240. 'os': get_os_info(),
  241. 'vm': get_vm_info(),
  242. 'net': get_host_network(),
  243. 'stats': get_host_stats(),
  244. }
  245. print(host_info)
  246. # {
  247. # 'guid': '1cd2dd279f8a854...6943f2384437991a',
  248. # 'os': {
  249. # 'os_arch': 'arm64',
  250. # 'os_family': 'darwin',
  251. # 'os_platform': 'macOS-14.6.1-arm64-arm-64bit',
  252. # 'os_kernel': 'Darwin Kernel Version 23.6.0: Mon Jul 29 21:14:30 PDT 2024; root:xnu-10063.141.2~1/RELEASE_ARM64_T6000',
  253. # 'os_release': 'macOS 14.6.1'
  254. # },
  255. # 'vm': {'hw_in_docker': False, 'hw_in_vm': False, 'hw_manufacturer': 'Apple', 'hw_product': 'Mac Studio Mac13,1', 'hw_uuid': '39A12B50-...-...-...-...'},
  256. # 'net': {
  257. # 'hostname': 'somehost.sub.example.com',
  258. # 'iface': 'en0',
  259. # 'mac_address': 'ab:cd:ef:12:34:56',
  260. # 'ip_local': '192.168.2.18',
  261. # 'ip_public': '123.123.123.123',
  262. # 'isp': 'AS-SONICTELECOM',
  263. # 'city': 'Berkeley',
  264. # 'region': 'California',
  265. # 'country': 'United States',
  266. # 'dns_server': '192.168.1.1'
  267. # },
  268. # 'stats': {
  269. # 'cpu_boot_time': '2024-09-24T21:20:16',
  270. # 'cpu_count': 10,
  271. # 'cpu_load': (2.35693359375, 4.013671875, 4.1171875),
  272. # 'mem_virt_used_pct': 66.0,
  273. # 'mem_virt_used_gb': 15.109,
  274. # 'mem_virt_free_gb': 0.065,
  275. # 'mem_swap_used_pct': 89.4,
  276. # 'mem_swap_used_gb': 8.045,
  277. # 'mem_swap_free_gb': 0.955,
  278. # 'disk_tmp_used_pct': 26.0,
  279. # 'disk_tmp_used_gb': 113.1,
  280. # 'disk_tmp_free_gb': 322.028,
  281. # 'disk_app_used_pct': 56.1,
  282. # 'disk_app_used_gb': 2138.796,
  283. # 'disk_app_free_gb': 1675.996,
  284. # 'disk_data_used_pct': 56.1,
  285. # 'disk_data_used_gb': 2138.796,
  286. # 'disk_data_free_gb': 1675.996
  287. # }
  288. # }