dbtextdb.py 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243
  1. #!/usr/bin/python3
  2. #
  3. # Copyright 2008 Google Inc. All Rights Reserved.
  4. """SQL-like access layer for dbtext.
  5. This module provides the glue for kamctl to interact with dbtext files
  6. using basic SQL syntax thus avoiding special case handling of dbtext.
  7. """
  8. import fcntl
  9. import os
  10. import shutil
  11. import sys
  12. import tempfile
  13. import time
  14. __author__ = '[email protected] (Herman Sheremetyev)'
  15. if 'DBTEXTDB_DEBUG' in os.environ:
  16. DEBUG = os.environ['DBTEXTDB_DEBUG']
  17. else:
  18. DEBUG = 0
  19. def Debug(msg):
  20. """Debug print method."""
  21. if DEBUG:
  22. print(msg)
  23. class DBText(object):
  24. """Provides connection to a dbtext database."""
  25. RESERVED_WORDS = ['SELECT', 'DELETE', 'UPDATE', 'INSERT', 'SET',
  26. 'VALUES', 'INTO', 'FROM', 'ORDER', 'BY', 'WHERE',
  27. 'COUNT', 'CONCAT', 'AND', 'AS']
  28. ALL_COMMANDS = ['SELECT', 'DELETE', 'UPDATE', 'INSERT']
  29. WHERE_COMMANDS = ['SELECT', 'DELETE', 'UPDATE']
  30. def __init__(self, location):
  31. self.location = location # location of dbtext tables
  32. self.tokens = [] # query broken up into tokens
  33. self.conditions = {} # args to the WHERE clause
  34. self.columns = [] # columns requested by SELECT
  35. self.table = '' # name of the table being queried
  36. self.header = {} # table header
  37. self.orig_data = [] # original table data used to diff after updates
  38. self.data = [] # table data as a list of dicts
  39. self.count = False # where or not using COUNT()
  40. self.aliases = {} # column aliases (SELECT AS)
  41. self.targets = {} # target columns-value pairs for INSERT/UPDATE
  42. self.args = '' # query arguments preceding the ;
  43. self.command = '' # which command are we executing
  44. self.strings = [] # list of string literals parsed from the query
  45. self.parens = [] # list of parentheses parsed from the query
  46. self._str_placeholder = '__DBTEXTDB_PARSED_OUT_STRING__'
  47. self._paren_placeholder = '__DBTEXTDB_PARSED_OUT_PARENS__'
  48. if not os.path.isdir(location):
  49. raise ParseError(location + ' is not a directory')
  50. def __del__(self):
  51. if getattr(self, 'fd', False):
  52. self.fd.close()
  53. def _ParseOrderBy(self):
  54. """Parse out the column name to be used for ordering the dataset.
  55. Raises:
  56. ParseError: Invalid ORDER BY clause
  57. """
  58. self.order_by = ''
  59. if 'ORDER' in self.tokens:
  60. order_index = self.tokens.index('ORDER')
  61. if order_index != len(self.tokens) - 3:
  62. raise ParseError('ORDER must be followed with BY and column '
  63. 'name')
  64. if self.tokens[order_index + 1] != 'BY':
  65. raise ParseError('ORDER must be followed with BY')
  66. self.order_by = self.tokens[order_index + 2]
  67. # strip off the order by stuff
  68. self.tokens.pop() # column name
  69. self.tokens.pop() # BY
  70. self.tokens.pop() # ORDER
  71. elif 'BY' in self.tokens:
  72. raise ParseError('BY must be preceded by ORDER')
  73. Debug('Order by: ' + self.order_by)
  74. def _ParseConditions(self):
  75. """Parse out WHERE clause.
  76. Take everything after the WHERE keyword and convert it to a dict of
  77. name value pairs corresponding to the columns and their values that
  78. should be matched.
  79. Raises:
  80. ParseError: Invalid WHERE clause
  81. NotSupportedError: Unsupported syntax
  82. """
  83. self.conditions = {}
  84. Debug('self.tokens = %s' % self.tokens)
  85. if 'WHERE' not in self.tokens:
  86. return
  87. if self.command not in self.WHERE_COMMANDS:
  88. raise ParseError(self.command + ' cannot have a WHERE clause')
  89. if 'OR' in self.tokens:
  90. raise NotSupportedError('WHERE clause does not support OR '
  91. 'operator')
  92. where_clause = self.tokens[self.tokens.index('WHERE') + 1:]
  93. self.conditions = self._ParsePairs(' '.join(where_clause), 'AND')
  94. for cond in self.conditions:
  95. self.conditions[cond] = self._EscapeChars(self.conditions[cond])
  96. Debug('Conditions are [%s]' % self.conditions)
  97. # pop off where clause
  98. a = self.tokens.pop()
  99. while a != 'WHERE':
  100. a = self.tokens.pop()
  101. Debug('self.tokens: %s' % self.tokens)
  102. def _ParseColumns(self):
  103. """Parse out the columns that need to be selected.
  104. Raises:
  105. ParseError: Invalid SELECT syntax
  106. """
  107. self.columns = []
  108. self.count = False
  109. self.aliases = {}
  110. col_end = 0
  111. # this is only valid for SELECT
  112. if self.command != 'SELECT':
  113. return
  114. if 'FROM' not in self.tokens:
  115. raise ParseError('SELECT must be followed by FROM')
  116. col_end = self.tokens.index('FROM')
  117. if not col_end: # col_end == 0
  118. raise ParseError('SELECT must be followed by column name[s]')
  119. cols_str = ' '.join(self.tokens[0:col_end])
  120. # check if there is a function modifier on the columns
  121. if self.tokens[0] == 'COUNT':
  122. self.count = True
  123. if col_end == 1:
  124. raise ParseError('COUNT must be followed by column name[s]')
  125. if not self.tokens[1].startswith(self._paren_placeholder):
  126. raise ParseError('COUNT must be followed by ()')
  127. cols_str = self._ReplaceParens(self.tokens[1])
  128. cols = cols_str.split(',')
  129. for col in cols:
  130. if not col.strip():
  131. raise ParseError('Extra comma in columns')
  132. col_split = col.split()
  133. if col_split[0] == 'CONCAT':
  134. # found a concat statement, do the same overall steps
  135. # for those cols
  136. self._ParseColumnsConcatHelper(col_split)
  137. else:
  138. col_split = col.split()
  139. if len(col_split) > 2 and col_split[1] != 'AS':
  140. raise ParseError('multiple columns must be separated '
  141. 'by a comma')
  142. elif len(col_split) == 3:
  143. if col_split[1] != 'AS':
  144. raise ParseError('Invalid column alias, use AS')
  145. my_key = self._ReplaceStringLiterals(col_split[2],
  146. quotes=True)
  147. my_val = self._ReplaceStringLiterals(col_split[0],
  148. quotes=True)
  149. self.aliases[my_key] = [my_val]
  150. self.columns.append(my_key)
  151. elif len(col_split) > 3:
  152. raise ParseError('multiple columns must be separated by '
  153. 'a comma')
  154. elif len(col_split) == 2: # alias
  155. my_key = self._ReplaceStringLiterals(col_split[1],
  156. quotes=True)
  157. my_val = self._ReplaceStringLiterals(col_split[0],
  158. quotes=True)
  159. self.aliases[my_key] = [my_val]
  160. self.columns.append(my_key)
  161. else:
  162. col = self._ReplaceStringLiterals(col, quotes=True).strip()
  163. if not col: # col == ''
  164. raise ParseError('empty column name not allowed')
  165. self.columns.append(col)
  166. # pop off all the columns related junk
  167. self.tokens = self.tokens[col_end + 1:]
  168. Debug('Columns: %s' % self.columns)
  169. Debug('Aliases: %s' % self.aliases)
  170. Debug('self.tokens: %s' % self.tokens)
  171. def _ParseColumnsConcatHelper(self, col_split):
  172. """Handles the columns being CONCAT'd together.
  173. Args:
  174. col_split: ['column', 'column']
  175. Raises:
  176. ParseError: invalid CONCAT()
  177. """
  178. concat_placeholder = '_'
  179. split_len = len(col_split)
  180. if split_len == 1:
  181. raise ParseError('CONCAT() must be followed by column name[s]')
  182. if not col_split[1].startswith(self._paren_placeholder):
  183. raise ParseError('CONCAT must be followed by ()')
  184. if split_len > 2:
  185. if split_len == 4 and col_split[2] != 'AS':
  186. raise ParseError('CONCAT() must be followed by an AS clause')
  187. if split_len > 5:
  188. raise ParseError('CONCAT() AS clause takes exactly 1 arg. '
  189. 'Extra args: [%s]' % (col_split[4:]))
  190. else:
  191. concat_placeholder = self._ReplaceStringLiterals(col_split[-1],
  192. quotes=True)
  193. # make sure this place hodler is unique
  194. while concat_placeholder in self.aliases:
  195. concat_placeholder += '_'
  196. concat_cols_str = self._ReplaceParens(col_split[1])
  197. concat_cols = concat_cols_str.split(',')
  198. concat_col_list = []
  199. for concat_col in concat_cols:
  200. if ' ' in concat_col.strip():
  201. raise ParseError('multiple columns must be separated by a '
  202. 'comma inside CONCAT()')
  203. concat_col = self._ReplaceStringLiterals(concat_col,
  204. quotes=True).strip()
  205. if not concat_col:
  206. raise ParseError('Attempting to CONCAT empty set')
  207. concat_col_list.append(concat_col)
  208. self.aliases[concat_placeholder] = concat_col_list
  209. self.columns.append(concat_placeholder)
  210. def _ParseTable(self):
  211. """Parse out the table name (multiple table names not supported).
  212. Raises:
  213. ParseError: Unable to parse table name
  214. """
  215. table_name = ''
  216. if (not self.tokens or # len == 0
  217. (self.tokens[0] in self.RESERVED_WORDS and
  218. self.tokens[0] not in ['FROM', 'INTO'])):
  219. raise ParseError('Missing table name')
  220. # SELECT
  221. if self.command == 'SELECT':
  222. table_name = self.tokens.pop(0)
  223. # INSERT
  224. elif self.command == 'INSERT':
  225. table_name = self.tokens.pop(0)
  226. if table_name == 'INTO':
  227. table_name = self.tokens.pop(0)
  228. # DELETE
  229. elif self.command == 'DELETE':
  230. if self.tokens[0] != 'FROM':
  231. raise ParseError('DELETE command must be followed by FROM')
  232. self.tokens.pop(0) # FROM
  233. table_name = self.tokens.pop(0)
  234. # UPDATE
  235. elif self.command == 'UPDATE':
  236. table_name = self.tokens.pop(0)
  237. if not self.table:
  238. self.table = table_name
  239. else:
  240. # multiple queries detected, make sure they're against same table
  241. if self.table != table_name:
  242. raise ParseError('Table changed between queries! %s -> %s' %
  243. (self.table, table_name))
  244. Debug('Table is [%s]' % self.table)
  245. Debug('self.tokens is %s' % self.tokens)
  246. def _ParseTargets(self):
  247. """Parse out name value pairs of columns and their values.
  248. Raises:
  249. ParseError: Unable to parse targets
  250. """
  251. self.targets = {}
  252. # UPDATE
  253. if self.command == 'UPDATE':
  254. if self.tokens.pop(0) != 'SET':
  255. raise ParseError('UPDATE command must be followed by SET')
  256. self.targets = self._ParsePairs(' '.join(self.tokens), ',')
  257. # INSERT
  258. if self.command == 'INSERT':
  259. if self.tokens[0] == 'SET':
  260. self.targets = self._ParsePairs(' '.join(self.tokens[1:]), ',')
  261. elif len(self.tokens) == 3 and self.tokens[1] == 'VALUES':
  262. if not self.tokens[0].startswith(self._paren_placeholder):
  263. raise ParseError('INSERT column names must be inside '
  264. 'parens')
  265. if not self.tokens[2].startswith(self._paren_placeholder):
  266. raise ParseError('INSERT values must be inside parens')
  267. cols = self._ReplaceParens(self.tokens[0]).split(',')
  268. vals = self._ReplaceParens(self.tokens[2]).split(',')
  269. if len(cols) != len(vals):
  270. raise ParseError('INSERT column and value numbers must '
  271. 'match')
  272. if not cols: # len == 0
  273. raise ParseError('INSERT column number must be greater '
  274. 'than 0')
  275. i = 0
  276. while i < len(cols):
  277. val = vals[i].strip()
  278. if not val: # val == ''
  279. raise ParseError('INSERT values cannot be empty')
  280. if ' ' in val:
  281. raise ParseError('INSERT values must be comma '
  282. 'separated')
  283. self.targets[cols[i].strip()] = \
  284. self._ReplaceStringLiterals(val)
  285. i += 1
  286. else:
  287. raise ParseError('Unable to parse INSERT targets')
  288. for target in self.targets:
  289. self.targets[target] = self._EscapeChars(self.targets[target])
  290. Debug('Targets are [%s]' % self.targets)
  291. def _EscapeChars(self, value):
  292. """Escape necessary chars before inserting into dbtext.
  293. Args:
  294. value: 'string'
  295. Returns:
  296. escaped: 'string' with chars escaped appropriately
  297. """
  298. # test that the value is string, if not return it as is
  299. try:
  300. value.find('a')
  301. except Exception:
  302. return value
  303. escaped = value
  304. escaped = escaped.replace('\\', '\\\\').replace('\0', '\\0')
  305. escaped = escaped.replace(':', '\\:').replace('\n', '\\n')
  306. escaped = escaped.replace('\r', '\\r').replace('\t', '\\t')
  307. return escaped
  308. def _UnEscapeChars(self, value):
  309. """Un-escape necessary chars before returning to user.
  310. Args:
  311. value: 'string'
  312. Returns:
  313. escaped: 'string' with chars escaped appropriately
  314. """
  315. # test that the value is string, if not return it as is
  316. try:
  317. value.find('a')
  318. except Exception:
  319. return value
  320. escaped = value
  321. escaped = escaped.replace('\\:', ':').replace('\\n', '\n')
  322. escaped = escaped.replace('\\r', '\r').replace('\\t', '\t')
  323. escaped = escaped.replace('\\0', '\0').replace('\\\\', '\\')
  324. return escaped
  325. def Execute(self, query, writethru=True):
  326. """Parse and execute the query.
  327. Args:
  328. query: e.g. 'select * from table;'
  329. writethru: bool
  330. Returns:
  331. dataset: [{col: val, col: val}, {col: val}, {col: val}]
  332. Raises:
  333. ExecuteError: unable to execute query
  334. """
  335. # parse the query
  336. self.ParseQuery(query)
  337. # get lock and execute the query
  338. self.OpenTable()
  339. Debug('Running ' + self.command)
  340. dataset = []
  341. if self.command == 'SELECT':
  342. dataset = self._RunSelect()
  343. elif self.command == 'UPDATE':
  344. dataset = self._RunUpdate()
  345. elif self.command == 'INSERT':
  346. dataset = self._RunInsert()
  347. elif self.command == 'DELETE':
  348. dataset = self._RunDelete()
  349. if self.command != 'SELECT' and writethru:
  350. self.WriteTempTable()
  351. self.MoveTableIntoPlace()
  352. Debug(dataset)
  353. return dataset
  354. def CleanUp(self):
  355. """Reset the internal variables (for multiple queries)."""
  356. self.tokens = [] # query broken up into tokens
  357. self.conditions = {} # args to the WHERE clause
  358. self.columns = [] # columns requested by SELECT
  359. self.table = '' # name of the table being queried
  360. self.header = {} # table header
  361. self.orig_data = [] # original table data used to diff after updates
  362. self.data = [] # table data as a list of dicts
  363. self.count = False # where or not using COUNT()
  364. self.aliases = {} # column aliases (SELECT AS)
  365. self.targets = {} # target columns-value pairs for INSERT/UPDATE
  366. self.args = '' # query arguments preceding the ;
  367. self.command = '' # which command are we executing
  368. self.strings = [] # list of string literals parsed from the query
  369. self.parens = [] # list of parentheses parsed from the query
  370. if getattr(self, 'fd', False):
  371. self.fd.close()
  372. def ParseQuery(self, query):
  373. """External wrapper for the query parsing routines.
  374. Args:
  375. query: string
  376. Raises:
  377. ParseError: Unable to parse query
  378. """
  379. self.args = query.split(';')[0]
  380. self._Tokenize()
  381. self._ParseCommand()
  382. self._ParseOrderBy()
  383. self._ParseConditions()
  384. self._ParseColumns()
  385. self._ParseTable()
  386. self._ParseTargets()
  387. def _ParseCommand(self):
  388. """Determine the command: SELECT, UPDATE, DELETE or INSERT.
  389. Raises:
  390. ParseError: unable to parse command
  391. """
  392. self.command = self.tokens[0]
  393. # Check that command is valid
  394. if self.command not in self.ALL_COMMANDS:
  395. raise ParseError('Unsupported command: ' + self.command)
  396. self.tokens.pop(0)
  397. Debug('Command is: %s' % self.command)
  398. Debug('self.tokens: %s' % self.tokens)
  399. def _Tokenize(self):
  400. """Turn the string query into a list of tokens.
  401. Split on '(', ')', ' ', ';', '=' and ','.
  402. In addition capitalize any SQL keywords found.
  403. """
  404. # horrible hack to handle now()
  405. time_now = '%s' % int(time.time())
  406. # round off the seconds for unittesting
  407. time_now = time_now[0:-2] + '00'
  408. while 'now()' in self.args.lower():
  409. start = self.args.lower().find('now()')
  410. self.args = ('%s%s%s' % (self.args[0:start], time_now,
  411. self.args[start + 5:]))
  412. # pad token separators with spaces
  413. pad = self.args.replace('(', ' ( ').replace(')', ' ) ')
  414. pad = pad.replace(',', ' , ').replace(';', ' ; ').replace('=', ' = ')
  415. self.args = pad
  416. # parse out all the blocks (string literals and parens)
  417. self._ParseOutBlocks()
  418. # split remaining into tokens
  419. self.tokens = self.args.split()
  420. # now capitalize
  421. i = 0
  422. while i < len(self.tokens):
  423. if self.tokens[i].upper() in self.RESERVED_WORDS:
  424. self.tokens[i] = self.tokens[i].upper()
  425. i += 1
  426. Debug('Tokens: %s' % self.tokens)
  427. def _ParseOutBlocks(self):
  428. """Parse out string literals and parenthesized values."""
  429. self.strings = []
  430. self.parens = []
  431. # set str placeholder to a value that's not present in the string
  432. while self._str_placeholder in self.args:
  433. self._str_placeholder = '%s_' % self._str_placeholder
  434. # set paren placeholder to a value that's not present in the string
  435. while self._paren_placeholder in self.args:
  436. self._paren_placeholder = '%s_' % self._paren_placeholder
  437. self.strings = self._ParseOutHelper(self._str_placeholder, ["'", '"'],
  438. 'quotes')
  439. self.parens = self._ParseOutHelper(self._paren_placeholder, ['(', ')'],
  440. 'parens')
  441. Debug('Strings: %s' % self.strings)
  442. Debug('Parens: %s' % self.parens)
  443. def _ParseOutHelper(self, placeholder, delims, mode):
  444. """Replace all text within delims with placeholders.
  445. Args:
  446. placeholder: string
  447. delims: list of strings
  448. mode: string
  449. 'parens': if there are 2 delims treat the first as opening
  450. and second as closing, such as with ( and )
  451. 'quotes': treat each delim as either opening or
  452. closing and require the same one to terminate the
  453. block, such as with ' and "
  454. Returns:
  455. list: [value1, value2, ...]
  456. Raises:
  457. ParseError: unable to parse out delims
  458. ExecuteError: Invalid usage
  459. """
  460. if mode not in ['quotes', 'parens']:
  461. raise ExecuteError('_ParseOutHelper: invalid mode ' + mode)
  462. if mode == 'parens' and len(delims) != 2:
  463. raise ExecuteError('_ParseOutHelper: delims must have 2 values '
  464. 'in "parens" mode')
  465. values = []
  466. started = 0
  467. new_args = ''
  468. string = ''
  469. my_id = 0
  470. delim = ''
  471. for c in self.args:
  472. if c in delims:
  473. if not started:
  474. if mode == 'parens' and c != delims[0]:
  475. raise ParseError('Found closing delimiter %s before '
  476. 'corresponding %s' % (c, delims[0]))
  477. started += 1
  478. delim = c
  479. else:
  480. if ((mode == 'parens' and c == delim) or
  481. (mode == 'quotes' and c != delim)):
  482. string = '%s%s' % (string, c)
  483. continue # wait for matching delim
  484. started -= 1
  485. if not started:
  486. values.append(string)
  487. new_args = '%s %s' % (new_args, '%s%d' % (placeholder,
  488. my_id))
  489. my_id += 1
  490. string = ''
  491. else:
  492. if not started:
  493. new_args = '%s%s' % (new_args, c)
  494. else:
  495. string = '%s%s' % (string, c)
  496. if started:
  497. if mode == 'parens':
  498. waiting_for = delims[1]
  499. else:
  500. waiting_for = delim
  501. raise ParseError('Unterminated block, waiting for ' + waiting_for)
  502. self.args = new_args
  503. Debug('Values: %s' % values)
  504. return values
  505. def _ReplaceStringLiterals(self, s, quotes=False):
  506. """Replaces string placeholders with real values.
  507. If quotes is set to True surround the returned value with single
  508. quotes
  509. Args:
  510. s: string
  511. quotes: bool
  512. Returns:
  513. s: string
  514. """
  515. if s.strip().startswith(self._str_placeholder):
  516. str_index = int(s.split(self._str_placeholder)[1])
  517. s = self.strings[str_index]
  518. if quotes:
  519. s = "'" + s + "'"
  520. return s
  521. def _ReplaceParens(self, s):
  522. """Replaces paren placeholders with real values.
  523. Args:
  524. s: string
  525. Returns:
  526. s: string
  527. """
  528. if s.strip().startswith(self._paren_placeholder):
  529. str_index = int(s.split(self._paren_placeholder)[1])
  530. s = self.parens[str_index].strip()
  531. return s
  532. def _RunDelete(self):
  533. """Run the DELETE command.
  534. Go through the rows in self.data matching them against the conditions,
  535. if they fit delete the row leaving a placeholder value (in order to
  536. keep the iteration process sane). Afterward clean up any empty values.
  537. Returns:
  538. dataset: [number of affected rows]
  539. """
  540. i = 0
  541. length = len(self.data)
  542. affected = 0
  543. while i < length:
  544. if self._MatchRow(self.data[i]):
  545. self.data[i] = None
  546. affected += 1
  547. i += 1
  548. # clean out the placeholders
  549. while None in self.data:
  550. self.data.remove(None)
  551. return [affected]
  552. def _RunUpdate(self):
  553. """Run the UPDATE command.
  554. Find the matching rows and update based on self.targets
  555. Returns:
  556. affected: [int]
  557. Raises:
  558. ExecuteError: failed to run UPDATE
  559. """
  560. i = 0
  561. length = len(self.data)
  562. affected = 0
  563. while i < length:
  564. if self._MatchRow(self.data[i]):
  565. for target in self.targets:
  566. if target not in self.header:
  567. raise ExecuteError(target + ' is an invalid column ' +
  568. 'name')
  569. if self.header[target]['auto']:
  570. raise ExecuteError(target + ' is type auto and ' +
  571. 'cannot be updated')
  572. self.data[i][target] = \
  573. self._TypeCheck(self.targets[target], target)
  574. affected += 1
  575. i += 1
  576. return [affected]
  577. def _RunInsert(self):
  578. """Run the INSERT command.
  579. Build up the row based on self.targets and table defaults, then
  580. append to self.data
  581. Returns:
  582. affected: [int]
  583. Raises:
  584. ExecuteError: failed to run INSERT
  585. """
  586. new_row = {}
  587. cols = self._SortHeaderColumns()
  588. for col in cols:
  589. if col in self.targets:
  590. if self.header[col]['auto']:
  591. raise ExecuteError(col + ' is type auto: cannot be ' +
  592. 'modified')
  593. new_row[col] = self.targets[col]
  594. elif self.header[col]['null']:
  595. new_row[col] = ''
  596. elif self.header[col]['auto']:
  597. new_row[col] = self._GetNextAuto(col)
  598. else:
  599. raise ExecuteError(col + ' cannot be empty or null')
  600. self.data.append(new_row)
  601. return [1]
  602. def _GetNextAuto(self, col):
  603. """Figure out the next value for col based on existing values.
  604. Scan all the current values and return the highest one + 1.
  605. Args:
  606. col: string
  607. Returns:
  608. next: int
  609. Raises:
  610. ExecuteError: Failed to get auto inc
  611. """
  612. highest = 0
  613. seen = []
  614. for row in self.data:
  615. if row[col] > highest:
  616. highest = row[col]
  617. if row[col] not in seen:
  618. seen.append(row[col])
  619. else:
  620. raise ExecuteError('duplicate value %s in %s' %
  621. (row[col], col))
  622. return highest + 1
  623. def _RunSelect(self):
  624. """Run the SELECT command.
  625. Returns:
  626. dataset: []
  627. Raises:
  628. ExecuteError: failed to run SELECT
  629. """
  630. dataset = []
  631. if ['*'] == self.columns:
  632. self.columns = self._SortHeaderColumns()
  633. for row in self.data:
  634. if self._MatchRow(row):
  635. match = []
  636. for col in self.columns:
  637. if col in self.aliases:
  638. concat = ''
  639. for concat_col in self.aliases[col]:
  640. if concat_col.startswith("'") and \
  641. concat_col.endswith("'"):
  642. concat += concat_col.strip("'")
  643. elif concat_col not in self.header.keys():
  644. raise ExecuteError('Table %s does not have ' +
  645. 'a column %s' %
  646. (self.table, concat_col))
  647. else:
  648. concat = '%s%s' % (concat, row[concat_col])
  649. if not concat.strip():
  650. raise ExecuteError('Empty CONCAT statement')
  651. my_match = concat
  652. elif col.startswith("'") and col.endswith("'"):
  653. my_match = col.strip("'")
  654. elif col not in self.header.keys():
  655. raise ExecuteError('Table %s does not have a column ' +
  656. '%s' % (self.table, col))
  657. else:
  658. my_match = row[col]
  659. match.append(self._UnEscapeChars(my_match))
  660. dataset.append(match)
  661. if self.count:
  662. Debug('Dataset: %s' % dataset)
  663. dataset = [len(dataset)]
  664. if self.order_by:
  665. if self.order_by not in self.header.keys():
  666. raise ExecuteError('Unknown column %s in ORDER BY clause' %
  667. self.order_by)
  668. pos = self._PositionByCol(self.order_by)
  669. dataset = self._SortMatrixByCol(dataset, pos)
  670. return dataset
  671. def _SortMatrixByCol(self, dataset, pos):
  672. """Sorts the matrix (array or arrays) based on a given column value.
  673. That is, if given matrix that looks like:
  674. [[1, 2, 3], [6, 5, 4], [3, 2, 1]]
  675. given pos = 0 produce:
  676. [[1, 2, 3], [3, 2, 1], [6, 5, 4]]
  677. given pos = 1 produce:
  678. [[1, 2, 3], [3, 2, 1], [6, 5, 4]]
  679. given pos = 2 produce:
  680. [[3, 2, 1], [1, 2, 3], [6, 5, 4]]
  681. Works for both integer and string values of column.
  682. Args:
  683. dataset: [[], [], ...]
  684. pos: int
  685. Returns:
  686. sorted: [[], [], ...]
  687. """
  688. # prepend value in pos to the beginning of every row
  689. i = 0
  690. while i < len(dataset):
  691. dataset[i].insert(0, dataset[i][pos])
  692. i += 1
  693. # sort the matrix, which is done on the row we just prepended
  694. dataset.sort()
  695. # strip away the first value
  696. i = 0
  697. while i < len(dataset):
  698. dataset[i].pop(0)
  699. i += 1
  700. return dataset
  701. def _MatchRow(self, row):
  702. """Matches the row against self.conditions.
  703. Args:
  704. row: ['val', 'val']
  705. Returns:
  706. Bool
  707. """
  708. match = True
  709. # when there are no conditions we match everything
  710. if not self.conditions:
  711. return match
  712. for condition in self.conditions:
  713. cond_val = self.conditions[condition]
  714. if condition not in self.header.keys():
  715. match = False
  716. break
  717. else:
  718. if cond_val != row[condition]:
  719. match = False
  720. break
  721. return match
  722. def _ProcessHeader(self):
  723. """Parse out the header information.
  724. Returns:
  725. {col_name:
  726. {'type': string,
  727. 'null': string,
  728. 'auto': string,
  729. 'pos': int
  730. }
  731. }
  732. """
  733. header = self.fd.readline().strip()
  734. cols = {}
  735. pos = 0
  736. for col in header.split():
  737. col_name = col.split('(')[0]
  738. col_type = col.split('(')[1].split(')')[0].split(',')[0]
  739. col_null = False
  740. col_auto = False
  741. if ',' in col.split('(')[1].split(')')[0]:
  742. if col.split('(')[1].split(')')[0].split(',')[1].lower() == \
  743. 'null':
  744. col_null = True
  745. if col.split('(')[1].split(')')[0].split(',')[1].lower() == \
  746. 'auto':
  747. col_auto = True
  748. cols[col_name] = {}
  749. cols[col_name]['type'] = col_type
  750. cols[col_name]['null'] = col_null
  751. cols[col_name]['auto'] = col_auto
  752. cols[col_name]['pos'] = pos
  753. pos += 1
  754. return cols
  755. def _GetData(self):
  756. """Reads table data into memory as a list of dicts keyed on column
  757. names.
  758. Returns:
  759. data: [{row}, {row}, ...]
  760. Raises:
  761. ExecuteError: failed to get data
  762. """
  763. data = []
  764. row_num = 0
  765. for row in self.fd:
  766. row = row.rstrip('\n')
  767. row_dict = {}
  768. i = 0
  769. field_start = 0
  770. field_num = 0
  771. while i < len(row):
  772. if row[i] == ':':
  773. # the following block is executed again after the
  774. # while is done
  775. val = row[field_start:i]
  776. col = self._ColByPosition(field_num)
  777. val = self._TypeCheck(val, col)
  778. row_dict[col] = val
  779. field_start = i + 1 # skip the colon itself
  780. field_num += 1
  781. if row[i] == '\\':
  782. i += 2 # skip the next char since it's escaped
  783. else:
  784. i += 1
  785. # handle the last field since we won't hit a : at the end
  786. # sucks to duplicate the code outside the loop but I can't think
  787. # of a better way :(
  788. val = row[field_start:i]
  789. col = self._ColByPosition(field_num)
  790. val = self._TypeCheck(val, col)
  791. row_dict[col] = val
  792. # verify that all columns were created
  793. for col in self.header:
  794. if col not in row_dict:
  795. raise ExecuteError('%s is missing from row %d in %s' %
  796. (col, row_num, self.table))
  797. row_num += 1
  798. data.append(row_dict)
  799. return data
  800. def _TypeCheck(self, val, col):
  801. """Verify type of val based on the header.
  802. Make sure the value is returned in quotes if it's a string
  803. and as '' when it's empty and Null
  804. Args:
  805. val: string
  806. col: string
  807. Returns:
  808. val: string
  809. Raises:
  810. ExecuteError: invalid value or column
  811. """
  812. if not val and not self.header[col]['null']:
  813. raise ExecuteError(col + ' cannot be empty or null')
  814. hdr_t = self.header[col]['type'].lower()
  815. if hdr_t == 'int' or hdr_t == 'double':
  816. try:
  817. if val:
  818. val = eval(val)
  819. except NameError as e:
  820. raise ExecuteError('Failed to parse %s in %s '
  821. '(unable to convert to type %s): %s' %
  822. (col, self.table, hdr_t, e))
  823. except SyntaxError as e:
  824. raise ExecuteError('Failed to parse %s in %s '
  825. '(unable to convert to type %s): %s' %
  826. (col, self.table, hdr_t, e))
  827. return val
  828. def _ColByPosition(self, pos):
  829. """Returns column name based on position.
  830. Args:
  831. pos: int
  832. Returns:
  833. column: string
  834. Raises:
  835. ExecuteError: invalid column
  836. """
  837. for col in self.header:
  838. if self.header[col]['pos'] == pos:
  839. return col
  840. raise ExecuteError('Header does not contain column %d' % pos)
  841. def _PositionByCol(self, col):
  842. """Returns position of the column based on the name.
  843. Args:
  844. col: string
  845. Returns:
  846. pos: int
  847. Raises:
  848. ExecuteError: invalid column
  849. """
  850. if col not in self.header.keys():
  851. raise ExecuteError(col + ' is not a valid column name')
  852. return self.header[col]['pos']
  853. def _SortHeaderColumns(self):
  854. """Sort column names by position.
  855. Returns:
  856. sorted: [col1, col2, ...]
  857. Raises:
  858. ExecuteError: unable to sort header
  859. """
  860. cols = self.header.keys()
  861. sorted_cols = [''] * len(cols)
  862. for col in cols:
  863. pos = self.header[col]['pos']
  864. sorted_cols[pos] = col
  865. if '' in sorted_cols:
  866. raise ExecuteError('Unable to sort header columns: %s' % cols)
  867. return sorted_cols
  868. def OpenTable(self):
  869. """Opens the table file and places its content into memory.
  870. Raises:
  871. ExecuteError: unable to open table
  872. """
  873. # if we already have a header assume multiple queries on same table
  874. # (can't use self.data in case the table was empty to begin with)
  875. if self.header:
  876. return
  877. try:
  878. self.fd = open(os.path.join(self.location, self.table), 'r')
  879. self.header = self._ProcessHeader()
  880. if self.command in ['INSERT', 'DELETE', 'UPDATE']:
  881. fcntl.flock(self.fd, fcntl.LOCK_EX)
  882. self.data = self._GetData()
  883. # save a copy of the data before modifying
  884. self.orig_data = self.data[:]
  885. except IOError as e:
  886. raise ExecuteError('Unable to open table %s: %s' % (self.table, e))
  887. Debug('Header is: %s' % self.header)
  888. # type check the conditions
  889. for cond in self.conditions:
  890. if cond not in self.header.keys():
  891. raise ExecuteError('unknown column %s in WHERE clause' % cond)
  892. self.conditions[cond] = self._TypeCheck(self.conditions[cond],
  893. cond)
  894. # type check the targets
  895. for target in self.targets:
  896. if target not in self.header.keys():
  897. raise ExecuteError('unknown column in targets: %s' % target)
  898. self.targets[target] = self._TypeCheck(self.targets[target],
  899. target)
  900. Debug('Type checked conditions: %s' % self.conditions)
  901. Debug('Data is:')
  902. for row in self.data:
  903. Debug('=======================')
  904. Debug(row)
  905. Debug('=======================')
  906. def WriteTempTable(self):
  907. """Write table header and data.
  908. First write header and data to a temp file,
  909. then move the tmp file to replace the original table file.
  910. """
  911. self.temp_file = tempfile.NamedTemporaryFile()
  912. Debug('temp_file: ' + self.temp_file.name)
  913. # write header
  914. columns = self._SortHeaderColumns()
  915. header = ''
  916. for col in columns:
  917. header = '%s %s' % (header, col)
  918. header = '%s(%s' % (header, self.header[col]['type'])
  919. if self.header[col]['null']:
  920. header = '%s,null)' % header
  921. elif self.header[col]['auto']:
  922. header = '%s,auto)' % header
  923. else:
  924. header = '%s)' % header
  925. self.temp_file.write((header.strip() + '\n').encode())
  926. # write data
  927. for row in self.data:
  928. row_str = ''
  929. for col in columns:
  930. row_str = '%s:%s' % (row_str, row[col])
  931. self.temp_file.write((row_str[1:] + '\n').encode())
  932. self.temp_file.flush()
  933. def MoveTableIntoPlace(self):
  934. """Replace the real table with the temp one.
  935. Diff the new data against the original and replace the table when they
  936. are different.
  937. """
  938. if self.data != self.orig_data:
  939. temp_file = self.temp_file.name
  940. table_file = os.path.join(self.location, self.table)
  941. Debug('Copying %s to %s' % (temp_file, table_file))
  942. shutil.copy(self.temp_file.name, self.location + '/' + self.table)
  943. def _ParsePairs(self, s, delimiter):
  944. """Parses out name value pairs from a string.
  945. String contains name=value pairs
  946. separated by a delimiter (such as "and" or ",")
  947. Args:
  948. s: string
  949. delimiter: string
  950. Returns:
  951. my_dict: dictionary
  952. Raises:
  953. ParseError: unable to parse pairs
  954. """
  955. my_dict = {}
  956. Debug('parse pairs: [%s]' % s)
  957. pairs = s.split(delimiter)
  958. for pair in pairs:
  959. if '=' not in pair:
  960. raise ParseError('Invalid condition pair: ' + pair)
  961. split = pair.split('=')
  962. Debug('split: %s' % split)
  963. if len(split) != 2:
  964. raise ParseError('Invalid condition pair: ' + pair)
  965. col = split[0].strip()
  966. if not col or not split[1].strip() or ' ' in col:
  967. raise ParseError('Invalid condition pair: ' + pair)
  968. val = self._ReplaceStringLiterals(split[1].strip())
  969. my_dict[col] = val
  970. return my_dict
  971. class Error(Exception):
  972. """DBText error."""
  973. class ParseError(Error):
  974. """Parse error."""
  975. class NotSupportedError(Error):
  976. """Not Supported error."""
  977. class ExecuteError(Error):
  978. """Execute error."""
  979. def main(argv):
  980. if len(argv) < 2:
  981. print('Usage %s query' % argv[0])
  982. sys.exit(1)
  983. if 'DBTEXT_PATH' not in os.environ or not os.environ['DBTEXT_PATH']:
  984. print('DBTEXT_PATH must be set')
  985. sys.exit(1)
  986. else:
  987. location = os.environ['DBTEXT_PATH']
  988. try:
  989. conn = DBText(location)
  990. dataset = conn.Execute(' '.join(argv[1:]))
  991. if dataset:
  992. for row in dataset:
  993. if conn.command != 'SELECT':
  994. print('Updated %s, rows affected: %d' % (conn.table, row))
  995. else:
  996. print(row)
  997. except Error as e:
  998. print(e)
  999. sys.exit(1)
  1000. if __name__ == '__main__':
  1001. main(sys.argv)