modulefinder.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. """Find modules used by a script, using introspection."""
  2. import dis
  3. import imp
  4. import marshal
  5. import os
  6. import re
  7. import string
  8. import sys
  9. if sys.platform=="win32":
  10. # On Windows, we can locate modules in the registry with
  11. # the help of the win32api package.
  12. try:
  13. import win32api
  14. except ImportError:
  15. print "The win32api module is not available - modules listed"
  16. print "in the registry will not be found."
  17. win32api = None
  18. IMPORT_NAME = dis.opname.index('IMPORT_NAME')
  19. IMPORT_FROM = dis.opname.index('IMPORT_FROM')
  20. # Modulefinder does a good job at simulating Python's, but it can not
  21. # handle __path__ modifications packages make at runtime. Therefore there
  22. # is a mechanism whereby you can register extra paths in this map for a
  23. # package, and it will be honoured.
  24. # Note this is a mapping is lists of paths.
  25. packagePathMap = {}
  26. # A Public interface
  27. def AddPackagePath(packagename, path):
  28. paths = packagePathMap.get(packagename, [])
  29. paths.append(path)
  30. packagePathMap[packagename] = paths
  31. class Module:
  32. def __init__(self, name, file=None, path=None):
  33. self.__name__ = name
  34. self.__file__ = file
  35. self.__path__ = path
  36. self.__code__ = None
  37. def __repr__(self):
  38. s = "Module(%s" % `self.__name__`
  39. if self.__file__ is not None:
  40. s = s + ", %s" % `self.__file__`
  41. if self.__path__ is not None:
  42. s = s + ", %s" % `self.__path__`
  43. s = s + ")"
  44. return s
  45. class ModuleFinder:
  46. def __init__(self, path=None, debug=0, excludes = []):
  47. if path is None:
  48. path = sys.path
  49. self.path = path
  50. self.modules = {}
  51. self.badmodules = {}
  52. self.debug = debug
  53. self.indent = 0
  54. self.excludes = excludes
  55. def msg(self, level, str, *args):
  56. if level <= self.debug:
  57. for i in range(self.indent):
  58. print " ",
  59. print str,
  60. for arg in args:
  61. print repr(arg),
  62. print
  63. def msgin(self, *args):
  64. level = args[0]
  65. if level <= self.debug:
  66. self.indent = self.indent + 1
  67. apply(self.msg, args)
  68. def msgout(self, *args):
  69. level = args[0]
  70. if level <= self.debug:
  71. self.indent = self.indent - 1
  72. apply(self.msg, args)
  73. def run_script(self, pathname):
  74. self.msg(2, "run_script", pathname)
  75. fp = open(pathname)
  76. stuff = ("", "r", imp.PY_SOURCE)
  77. self.load_module('__main__', fp, pathname, stuff)
  78. def load_file(self, pathname):
  79. dir, name = os.path.split(pathname)
  80. name, ext = os.path.splitext(name)
  81. fp = open(pathname)
  82. stuff = (ext, "r", imp.PY_SOURCE)
  83. self.load_module(name, fp, pathname, stuff)
  84. def import_hook(self, name, caller=None, fromlist=None):
  85. self.msg(3, "import_hook", name, caller, fromlist)
  86. parent = self.determine_parent(caller)
  87. q, tail = self.find_head_package(parent, name)
  88. m = self.load_tail(q, tail)
  89. if not fromlist:
  90. return q
  91. if m.__path__:
  92. self.ensure_fromlist(m, fromlist)
  93. def determine_parent(self, caller):
  94. self.msgin(4, "determine_parent", caller)
  95. if not caller:
  96. self.msgout(4, "determine_parent -> None")
  97. return None
  98. pname = caller.__name__
  99. if caller.__path__:
  100. parent = self.modules[pname]
  101. assert caller is parent
  102. self.msgout(4, "determine_parent ->", parent)
  103. return parent
  104. if '.' in pname:
  105. i = string.rfind(pname, '.')
  106. pname = pname[:i]
  107. parent = self.modules[pname]
  108. assert parent.__name__ == pname
  109. self.msgout(4, "determine_parent ->", parent)
  110. return parent
  111. self.msgout(4, "determine_parent -> None")
  112. return None
  113. def find_head_package(self, parent, name):
  114. self.msgin(4, "find_head_package", parent, name)
  115. if '.' in name:
  116. i = string.find(name, '.')
  117. head = name[:i]
  118. tail = name[i+1:]
  119. else:
  120. head = name
  121. tail = ""
  122. if parent:
  123. qname = "%s.%s" % (parent.__name__, head)
  124. else:
  125. qname = head
  126. q = self.import_module(head, qname, parent)
  127. if q:
  128. self.msgout(4, "find_head_package ->", (q, tail))
  129. return q, tail
  130. if parent:
  131. qname = head
  132. parent = None
  133. q = self.import_module(head, qname, parent)
  134. if q:
  135. self.msgout(4, "find_head_package ->", (q, tail))
  136. return q, tail
  137. self.msgout(4, "raise ImportError: No module named", qname)
  138. raise ImportError, "No module named " + qname
  139. def load_tail(self, q, tail):
  140. self.msgin(4, "load_tail", q, tail)
  141. m = q
  142. while tail:
  143. i = string.find(tail, '.')
  144. if i < 0: i = len(tail)
  145. head, tail = tail[:i], tail[i+1:]
  146. mname = "%s.%s" % (m.__name__, head)
  147. m = self.import_module(head, mname, m)
  148. if not m:
  149. self.msgout(4, "raise ImportError: No module named", mname)
  150. raise ImportError, "No module named " + mname
  151. self.msgout(4, "load_tail ->", m)
  152. return m
  153. def ensure_fromlist(self, m, fromlist, recursive=0):
  154. self.msg(4, "ensure_fromlist", m, fromlist, recursive)
  155. for sub in fromlist:
  156. if sub == "*":
  157. if not recursive:
  158. all = self.find_all_submodules(m)
  159. if all:
  160. self.ensure_fromlist(m, all, 1)
  161. elif not hasattr(m, sub):
  162. subname = "%s.%s" % (m.__name__, sub)
  163. submod = self.import_module(sub, subname, m)
  164. if not submod:
  165. raise ImportError, "No module named " + subname
  166. def find_all_submodules(self, m):
  167. if not m.__path__:
  168. return
  169. modules = {}
  170. suffixes = [".py", ".pyc", ".pyo"]
  171. for dir in m.__path__:
  172. try:
  173. names = os.listdir(dir)
  174. except os.error:
  175. self.msg(2, "can't list directory", dir)
  176. continue
  177. for name in names:
  178. mod = None
  179. for suff in suffixes:
  180. n = len(suff)
  181. if name[-n:] == suff:
  182. mod = name[:-n]
  183. break
  184. if mod and mod != "__init__":
  185. modules[mod] = mod
  186. return modules.keys()
  187. def import_module(self, partname, fqname, parent):
  188. self.msgin(3, "import_module", partname, fqname, parent)
  189. try:
  190. m = self.modules[fqname]
  191. except KeyError:
  192. pass
  193. else:
  194. self.msgout(3, "import_module ->", m)
  195. return m
  196. if self.badmodules.has_key(fqname):
  197. self.msgout(3, "import_module -> None")
  198. self.badmodules[fqname][parent.__name__] = None
  199. return None
  200. try:
  201. fp, pathname, stuff = self.find_module(partname,
  202. parent and parent.__path__)
  203. except ImportError:
  204. self.msgout(3, "import_module ->", None)
  205. return None
  206. try:
  207. m = self.load_module(fqname, fp, pathname, stuff)
  208. finally:
  209. if fp: fp.close()
  210. if parent:
  211. setattr(parent, partname, m)
  212. self.msgout(3, "import_module ->", m)
  213. return m
  214. def load_module(self, fqname, fp, pathname, (suffix, mode, type)):
  215. self.msgin(2, "load_module", fqname, fp and "fp", pathname)
  216. if type == imp.PKG_DIRECTORY:
  217. m = self.load_package(fqname, pathname)
  218. self.msgout(2, "load_module ->", m)
  219. return m
  220. if type == imp.PY_SOURCE:
  221. co = compile(fp.read()+'\n', pathname, 'exec')
  222. elif type == imp.PY_COMPILED:
  223. if fp.read(4) != imp.get_magic():
  224. self.msgout(2, "raise ImportError: Bad magic number", pathname)
  225. raise ImportError, "Bad magic number in %s", pathname
  226. fp.read(4)
  227. co = marshal.load(fp)
  228. else:
  229. co = None
  230. m = self.add_module(fqname)
  231. m.__file__ = pathname
  232. if co:
  233. m.__code__ = co
  234. self.scan_code(co, m)
  235. self.msgout(2, "load_module ->", m)
  236. return m
  237. def scan_code(self, co, m):
  238. code = co.co_code
  239. n = len(code)
  240. i = 0
  241. lastname = None
  242. while i < n:
  243. c = code[i]
  244. i = i+1
  245. op = ord(c)
  246. if op >= dis.HAVE_ARGUMENT:
  247. oparg = ord(code[i]) + ord(code[i+1])*256
  248. i = i+2
  249. if op == IMPORT_NAME:
  250. name = lastname = co.co_names[oparg]
  251. if not self.badmodules.has_key(lastname):
  252. try:
  253. self.import_hook(name, m)
  254. except ImportError, msg:
  255. self.msg(2, "ImportError:", str(msg))
  256. if not self.badmodules.has_key(name):
  257. self.badmodules[name] = {}
  258. self.badmodules[name][m.__name__] = None
  259. elif op == IMPORT_FROM:
  260. name = co.co_names[oparg]
  261. assert lastname is not None
  262. if not self.badmodules.has_key(lastname):
  263. try:
  264. self.import_hook(lastname, m, [name])
  265. except ImportError, msg:
  266. self.msg(2, "ImportError:", str(msg))
  267. fullname = lastname + "." + name
  268. if not self.badmodules.has_key(fullname):
  269. self.badmodules[fullname] = {}
  270. self.badmodules[fullname][m.__name__] = None
  271. else:
  272. lastname = None
  273. for c in co.co_consts:
  274. if isinstance(c, type(co)):
  275. self.scan_code(c, m)
  276. def load_package(self, fqname, pathname):
  277. self.msgin(2, "load_package", fqname, pathname)
  278. m = self.add_module(fqname)
  279. m.__file__ = pathname
  280. m.__path__ = [pathname]
  281. # As per comment at top of file, simulate runtime __path__ additions.
  282. m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
  283. fp, buf, stuff = self.find_module("__init__", m.__path__)
  284. self.load_module(fqname, fp, buf, stuff)
  285. self.msgout(2, "load_package ->", m)
  286. return m
  287. def add_module(self, fqname):
  288. if self.modules.has_key(fqname):
  289. return self.modules[fqname]
  290. self.modules[fqname] = m = Module(fqname)
  291. return m
  292. def find_module(self, name, path):
  293. if name in self.excludes:
  294. self.msgout(3, "find_module -> Excluded")
  295. raise ImportError, name
  296. if path is None:
  297. if name in sys.builtin_module_names:
  298. return (None, None, ("", "", imp.C_BUILTIN))
  299. # Emulate the Registered Module support on Windows.
  300. if sys.platform=="win32" and win32api is not None:
  301. HKEY_LOCAL_MACHINE = 0x80000002
  302. try:
  303. pathname = win32api.RegQueryValue(HKEY_LOCAL_MACHINE, "Software\\Python\\PythonCore\\%s\\Modules\\%s" % (sys.winver, name))
  304. fp = open(pathname, "rb")
  305. # XXX - To do - remove the hard code of C_EXTENSION.
  306. stuff = "", "rb", imp.C_EXTENSION
  307. return fp, pathname, stuff
  308. except win32api.error:
  309. pass
  310. path = self.path
  311. return imp.find_module(name, path)
  312. def report(self):
  313. print
  314. print " %-25s %s" % ("Name", "File")
  315. print " %-25s %s" % ("----", "----")
  316. # Print modules found
  317. keys = self.modules.keys()
  318. keys.sort()
  319. for key in keys:
  320. m = self.modules[key]
  321. if m.__path__:
  322. print "P",
  323. else:
  324. print "m",
  325. print "%-25s" % key, m.__file__ or ""
  326. # Print missing modules
  327. keys = self.badmodules.keys()
  328. keys.sort()
  329. for key in keys:
  330. # ... but not if they were explicitely excluded.
  331. if key not in self.excludes:
  332. mods = self.badmodules[key].keys()
  333. mods.sort()
  334. print "?", key, "from", string.join(mods, ', ')
  335. def test():
  336. # Parse command line
  337. import getopt
  338. try:
  339. opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
  340. except getopt.error, msg:
  341. print msg
  342. return
  343. # Process options
  344. debug = 1
  345. domods = 0
  346. addpath = []
  347. exclude = []
  348. for o, a in opts:
  349. if o == '-d':
  350. debug = debug + 1
  351. if o == '-m':
  352. domods = 1
  353. if o == '-p':
  354. addpath = addpath + string.split(a, os.pathsep)
  355. if o == '-q':
  356. debug = 0
  357. if o == '-x':
  358. exclude.append(a)
  359. # Provide default arguments
  360. if not args:
  361. script = "hello.py"
  362. else:
  363. script = args[0]
  364. # Set the path based on sys.path and the script directory
  365. path = sys.path[:]
  366. path[0] = os.path.dirname(script)
  367. path = addpath + path
  368. if debug > 1:
  369. print "path:"
  370. for item in path:
  371. print " ", `item`
  372. # Create the module finder and turn its crank
  373. mf = ModuleFinder(path, debug, exclude)
  374. for arg in args[1:]:
  375. if arg == '-m':
  376. domods = 1
  377. continue
  378. if domods:
  379. if arg[-2:] == '.*':
  380. mf.import_hook(arg[:-2], None, ["*"])
  381. else:
  382. mf.import_hook(arg)
  383. else:
  384. mf.load_file(arg)
  385. mf.run_script(script)
  386. mf.report()
  387. if __name__ == '__main__':
  388. try:
  389. test()
  390. except KeyboardInterrupt:
  391. print "\n[interrupt]"