12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199 |
- #!/usr/bin/python
- #
- # Copyright 2008 Google Inc. All Rights Reserved.
- """SQL-like access layer for dbtext.
- This module provides the glue for kamctl to interact with dbtext files
- using basic SQL syntax thus avoiding special case handling of dbtext.
- """
- __author__ = '[email protected] (Herman Sheremetyev)'
- import fcntl
- import os
- import shutil
- import sys
- import tempfile
- import time
- if 'DBTEXTDB_DEBUG' in os.environ:
- DEBUG = os.environ['DBTEXTDB_DEBUG']
- else:
- DEBUG = 0
- def Debug(msg):
- """Debug print method."""
- if DEBUG:
- print msg
- class DBText(object):
- """Provides connection to a dbtext database."""
- RESERVED_WORDS = ['SELECT', 'DELETE', 'UPDATE', 'INSERT', 'SET',
- 'VALUES', 'INTO', 'FROM', 'ORDER', 'BY', 'WHERE',
- 'COUNT', 'CONCAT', 'AND', 'AS']
- ALL_COMMANDS = ['SELECT', 'DELETE', 'UPDATE', 'INSERT']
- WHERE_COMMANDS = ['SELECT', 'DELETE', 'UPDATE']
- def __init__(self, location):
- self.location = location # location of dbtext tables
- self.tokens = [] # query broken up into tokens
- self.conditions = {} # args to the WHERE clause
- self.columns = [] # columns requested by SELECT
- self.table = '' # name of the table being queried
- self.header = {} # table header
- self.orig_data = [] # original table data used to diff after updates
- self.data = [] # table data as a list of dicts
- self.count = False # where or not using COUNT()
- self.aliases = {} # column aliases (SELECT AS)
- self.targets = {} # target columns-value pairs for INSERT/UPDATE
- self.args = '' # query arguments preceeding the ;
- self.command = '' # which command are we executing
- self.strings = [] # list of string literals parsed from the query
- self.parens = [] # list of parentheses parsed from the query
- self._str_placeholder = '__DBTEXTDB_PARSED_OUT_STRING__'
- self._paren_placeholder = '__DBTEXTDB_PARSED_OUT_PARENS__'
- if not os.path.isdir(location):
- raise ParseError(location + ' is not a directory')
- def _ParseOrderBy(self):
- """Parse out the column name to be used for ordering the dataset.
- Raises:
- ParseError: Invalid ORDER BY clause
- """
- self.order_by = ''
- if 'ORDER' in self.tokens:
- order_index = self.tokens.index('ORDER')
- if order_index != len(self.tokens) - 3:
- raise ParseError('ORDER must be followed with BY and column name')
- if self.tokens[order_index + 1] != 'BY':
- raise ParseError('ORDER must be followed with BY')
- self.order_by = self.tokens[order_index + 2]
- # strip off the order by stuff
- self.tokens.pop() # column name
- self.tokens.pop() # BY
- self.tokens.pop() # ORDER
- elif 'BY' in self.tokens:
- raise ParseError('BY must be preceeded by ORDER')
- Debug('Order by: ' + self.order_by)
- def _ParseConditions(self):
- """Parse out WHERE clause.
- Take everything after the WHERE keyword and convert it to a dict of
- name value pairs corresponding to the columns and their values that
- should be matched.
- Raises:
- ParseError: Invalid WHERE clause
- NotSupportedError: Unsupported syntax
- """
- self.conditions = {}
- Debug('self.tokens = %s' % self.tokens)
- if 'WHERE' not in self.tokens:
- return
- if self.command not in self.WHERE_COMMANDS:
- raise ParseError(self.command + ' cannot have a WHERE clause')
- if 'OR' in self.tokens:
- raise NotSupportedError('WHERE clause does not support OR operator')
- where_clause = self.tokens[self.tokens.index('WHERE') + 1:]
- self.conditions = self._ParsePairs(' '.join(where_clause), 'AND')
- for cond in self.conditions:
- self.conditions[cond] = self._EscapeChars(self.conditions[cond])
- Debug('Conditions are [%s]' % self.conditions)
- # pop off where clause
- a = self.tokens.pop()
- while a != 'WHERE':
- a = self.tokens.pop()
- Debug('self.tokens: %s' % self.tokens)
- def _ParseColumns(self):
- """Parse out the columns that need to be selected.
- Raises:
- ParseError: Invalid SELECT syntax
- """
- self.columns = []
- self.count = False
- self.aliases = {}
- col_end = 0
- # this is only valid for SELECT
- if self.command != 'SELECT':
- return
- if 'FROM' not in self.tokens:
- raise ParseError('SELECT must be followed by FROM')
- col_end = self.tokens.index('FROM')
- if not col_end: # col_end == 0
- raise ParseError('SELECT must be followed by column name[s]')
- cols_str = ' '.join(self.tokens[0:col_end])
- # check if there is a function modifier on the columns
- if self.tokens[0] == 'COUNT':
- self.count = True
- if col_end == 1:
- raise ParseError('COUNT must be followed by column name[s]')
- if not self.tokens[1].startswith(self._paren_placeholder):
- raise ParseError('COUNT must be followed by ()')
- cols_str = self._ReplaceParens(self.tokens[1])
- cols = cols_str.split(',')
- for col in cols:
- if not col.strip():
- raise ParseError('Extra comma in columns')
- col_split = col.split()
- if col_split[0] == 'CONCAT':
- # found a concat statement, do the same overall steps for those cols
- self._ParseColumnsConcatHelper(col_split)
- else:
- col_split = col.split()
- if len(col_split) > 2 and col_split[1] != 'AS':
- raise ParseError('multiple columns must be separated by a comma')
- elif len(col_split) == 3:
- if col_split[1] != 'AS':
- raise ParseError('Invalid column alias, use AS')
- my_key = self._ReplaceStringLiterals(col_split[2], quotes=True)
- my_val = self._ReplaceStringLiterals(col_split[0], quotes=True)
- self.aliases[my_key] = [my_val]
- self.columns.append(my_key)
- elif len(col_split) > 3:
- raise ParseError('multiple columns must be separated by a comma')
- elif len(col_split) == 2: # alias
- my_key = self._ReplaceStringLiterals(col_split[1], quotes=True)
- my_val = self._ReplaceStringLiterals(col_split[0], quotes=True)
- self.aliases[my_key] = [my_val]
- self.columns.append(my_key)
- else:
- col = self._ReplaceStringLiterals(col, quotes=True).strip()
- if not col: # col == ''
- raise ParseError('empty column name not allowed')
- self.columns.append(col)
- # pop off all the columns related junk
- self.tokens = self.tokens[col_end + 1:]
- Debug('Columns: %s' % self.columns)
- Debug('Aliases: %s' % self.aliases)
- Debug('self.tokens: %s' % self.tokens)
- def _ParseColumnsConcatHelper(self, col_split):
- """Handles the columns being CONCAT'd together.
- Args:
- col_split: ['column', 'column']
- Raises:
- ParseError: invalid CONCAT()
- """
- concat_placeholder = '_'
- split_len = len(col_split)
- if split_len == 1:
- raise ParseError('CONCAT() must be followed by column name[s]')
- if not col_split[1].startswith(self._paren_placeholder):
- raise ParseError('CONCAT must be followed by ()')
- if split_len > 2:
- if split_len == 4 and col_split[2] != 'AS':
- raise ParseError('CONCAT() must be followed by an AS clause')
- if split_len > 5:
- raise ParseError('CONCAT() AS clause takes exactly 1 arg. '
- 'Extra args: [%s]' % (col_split[4:]))
- else:
- concat_placeholder = self._ReplaceStringLiterals(col_split[-1],
- quotes=True)
- # make sure this place hodler is unique
- while concat_placeholder in self.aliases:
- concat_placeholder += '_'
- concat_cols_str = self._ReplaceParens(col_split[1])
- concat_cols = concat_cols_str.split(',')
- concat_col_list = []
- for concat_col in concat_cols:
- if ' ' in concat_col.strip():
- raise ParseError('multiple columns must be separated by a'
- ' comma inside CONCAT()')
- concat_col = self._ReplaceStringLiterals(concat_col, quotes=True).strip()
- if not concat_col:
- raise ParseError('Attempting to CONCAT empty set')
- concat_col_list.append(concat_col)
- self.aliases[concat_placeholder] = concat_col_list
- self.columns.append(concat_placeholder)
- def _ParseTable(self):
- """Parse out the table name (multiple table names not supported).
- Raises:
- ParseError: Unable to parse table name
- """
- table_name = ''
- if (not self.tokens or # len == 0
- (self.tokens[0] in self.RESERVED_WORDS and
- self.tokens[0] not in ['FROM', 'INTO'])):
- raise ParseError('Missing table name')
- # SELECT
- if self.command == 'SELECT':
- table_name = self.tokens.pop(0)
- # INSERT
- elif self.command == 'INSERT':
- table_name = self.tokens.pop(0)
- if table_name == 'INTO':
- table_name = self.tokens.pop(0)
- # DELETE
- elif self.command == 'DELETE':
- if self.tokens[0] != 'FROM':
- raise ParseError('DELETE command must be followed by FROM')
- self.tokens.pop(0) # FROM
- table_name = self.tokens.pop(0)
- # UPDATE
- elif self.command == 'UPDATE':
- table_name = self.tokens.pop(0)
- if not self.table:
- self.table = table_name
- else: # multiple queries detected, make sure they're against same table
- if self.table != table_name:
- raise ParseError('Table changed between queries! %s -> %s' %
- (self.table, table_name))
- Debug('Table is [%s]' % self.table)
- Debug('self.tokens is %s' % self.tokens)
- def _ParseTargets(self):
- """Parse out name value pairs of columns and their values.
- Raises:
- ParseError: Unable to parse targets
- """
- self.targets = {}
- # UPDATE
- if self.command == 'UPDATE':
- if self.tokens.pop(0) != 'SET':
- raise ParseError('UPDATE command must be followed by SET')
- self.targets = self._ParsePairs(' '.join(self.tokens), ',')
- # INSERT
- if self.command == 'INSERT':
- if self.tokens[0] == 'SET':
- self.targets = self._ParsePairs(' '.join(self.tokens[1:]), ',')
- elif len(self.tokens) == 3 and self.tokens[1] == 'VALUES':
- if not self.tokens[0].startswith(self._paren_placeholder):
- raise ParseError('INSERT column names must be inside parens')
- if not self.tokens[2].startswith(self._paren_placeholder):
- raise ParseError('INSERT values must be inside parens')
- cols = self._ReplaceParens(self.tokens[0]).split(',')
- vals = self._ReplaceParens(self.tokens[2]).split(',')
- if len(cols) != len(vals):
- raise ParseError('INSERT column and value numbers must match')
- if not cols: # len == 0
- raise ParseError('INSERT column number must be greater than 0')
- i = 0
- while i < len(cols):
- val = vals[i].strip()
- if not val: # val == ''
- raise ParseError('INSERT values cannot be empty')
- if ' ' in val:
- raise ParseError('INSERT values must be comma separated')
- self.targets[cols[i].strip()] = self._ReplaceStringLiterals(val)
- i += 1
- else:
- raise ParseError('Unable to parse INSERT targets')
- for target in self.targets:
- self.targets[target] = self._EscapeChars(self.targets[target])
- Debug('Targets are [%s]' % self.targets)
- def _EscapeChars(self, value):
- """Escape necessary chars before inserting into dbtext.
- Args:
- value: 'string'
- Returns:
- escaped: 'string' with chars escaped appropriately
- """
- # test that the value is string, if not return it as is
- try:
- value.find('a')
- except:
- return value
- escaped = value
- escaped = escaped.replace('\\', '\\\\').replace('\0', '\\0')
- escaped = escaped.replace(':', '\\:').replace('\n', '\\n')
- escaped = escaped.replace('\r', '\\r').replace('\t', '\\t')
- return escaped
- def _UnEscapeChars(self, value):
- """Un-escape necessary chars before returning to user.
- Args:
- value: 'string'
- Returns:
- escaped: 'string' with chars escaped appropriately
- """
- # test that the value is string, if not return it as is
- try:
- value.find('a')
- except:
- return value
- escaped = value
- escaped = escaped.replace('\\:', ':').replace('\\n', '\n')
- escaped = escaped.replace('\\r', '\r').replace('\\t', '\t')
- escaped = escaped.replace('\\0', '\0').replace('\\\\', '\\')
- return escaped
- def Execute(self, query, writethru=True):
- """Parse and execute the query.
- Args:
- query: e.g. 'select * from table;'
- writethru: bool
- Returns:
- dataset: [{col: val, col: val}, {col: val}, {col: val}]
- Raises:
- ExecuteError: unable to execute query
- """
- # parse the query
- self.ParseQuery(query)
- # get lock and execute the query
- self.OpenTable()
- Debug('Running ' + self.command)
- dataset = []
- if self.command == 'SELECT':
- dataset = self._RunSelect()
- elif self.command == 'UPDATE':
- dataset = self._RunUpdate()
- elif self.command == 'INSERT':
- dataset = self._RunInsert()
- elif self.command == 'DELETE':
- dataset = self._RunDelete()
- if self.command != 'SELECT' and writethru:
- self.WriteTempTable()
- self.MoveTableIntoPlace()
- Debug(dataset)
- return dataset
- def CleanUp(self):
- """Reset the internal variables (for multiple queries)."""
- self.tokens = [] # query broken up into tokens
- self.conditions = {} # args to the WHERE clause
- self.columns = [] # columns requested by SELECT
- self.table = '' # name of the table being queried
- self.header = {} # table header
- self.orig_data = [] # original table data used to diff after updates
- self.data = [] # table data as a list of dicts
- self.count = False # where or not using COUNT()
- self.aliases = {} # column aliases (SELECT AS)
- self.targets = {} # target columns-value pairs for INSERT/UPDATE
- self.args = '' # query arguments preceeding the ;
- self.command = '' # which command are we executing
- self.strings = [] # list of string literals parsed from the query
- self.parens = [] # list of parentheses parsed from the query
- def ParseQuery(self, query):
- """External wrapper for the query parsing routines.
- Args:
- query: string
- Raises:
- ParseError: Unable to parse query
- """
- self.args = query.split(';')[0]
- self._Tokenize()
- self._ParseCommand()
- self._ParseOrderBy()
- self._ParseConditions()
- self._ParseColumns()
- self._ParseTable()
- self._ParseTargets()
- def _ParseCommand(self):
- """Determine the command: SELECT, UPDATE, DELETE or INSERT.
- Raises:
- ParseError: unable to parse command
- """
- self.command = self.tokens[0]
- # Check that command is valid
- if self.command not in self.ALL_COMMANDS:
- raise ParseError('Unsupported command: ' + self.command)
- self.tokens.pop(0)
- Debug('Command is: %s' % self.command)
- Debug('self.tokens: %s' % self.tokens)
- def _Tokenize(self):
- """Turn the string query into a list of tokens.
- Split on '(', ')', ' ', ';', '=' and ','.
- In addition capitalize any SQL keywords found.
- """
- # horrible hack to handle now()
- time_now = '%s' % int(time.time())
- time_now = time_now[0:-2] + '00' # round off the seconds for unittesting
- while 'now()' in self.args.lower():
- start = self.args.lower().find('now()')
- self.args = ('%s%s%s' % (self.args[0:start], time_now,
- self.args[start + 5:]))
- # pad token separators with spaces
- pad = self.args.replace('(', ' ( ').replace(')', ' ) ')
- pad = pad.replace(',', ' , ').replace(';', ' ; ').replace('=', ' = ')
- self.args = pad
- # parse out all the blocks (string literals and parens)
- self._ParseOutBlocks()
- # split remaining into tokens
- self.tokens = self.args.split()
- # now capitalize
- i = 0
- while i < len(self.tokens):
- if self.tokens[i].upper() in self.RESERVED_WORDS:
- self.tokens[i] = self.tokens[i].upper()
- i += 1
- Debug('Tokens: %s' % self.tokens)
- def _ParseOutBlocks(self):
- """Parse out string literals and parenthesized values."""
- self.strings = []
- self.parens = []
- # set str placeholder to a value that's not present in the string
- while self._str_placeholder in self.args:
- self._str_placeholder = '%s_' % self._str_placeholder
- # set paren placeholder to a value that's not present in the string
- while self._paren_placeholder in self.args:
- self._paren_placeholder = '%s_' % self._paren_placeholder
- self.strings = self._ParseOutHelper(self._str_placeholder, ["'", '"'],
- 'quotes')
- self.parens = self._ParseOutHelper(self._paren_placeholder, ['(', ')'],
- 'parens')
- Debug('Strings: %s' % self.strings)
- Debug('Parens: %s' % self.parens)
- def _ParseOutHelper(self, placeholder, delims, mode):
- """Replace all text within delims with placeholders.
- Args:
- placeholder: string
- delims: list of strings
- mode: string
- 'parens': if there are 2 delims treat the first as opening
- and second as closing, such as with ( and )
- 'quotes': treat each delim as either opening or
- closing and require the same one to terminate the block,
- such as with ' and "
- Returns:
- list: [value1, value2, ...]
- Raises:
- ParseError: unable to parse out delims
- ExecuteError: Invalid usage
- """
- if mode not in ['quotes', 'parens']:
- raise ExecuteError('_ParseOutHelper: invalid mode ' + mode)
- if mode == 'parens' and len(delims) != 2:
- raise ExecuteError('_ParseOutHelper: delims must have 2 values '
- 'in "parens" mode')
- values = []
- started = 0
- new_args = ''
- string = ''
- my_id = 0
- delim = ''
- for c in self.args:
- if c in delims:
- if not started:
- if mode == 'parens' and c != delims[0]:
- raise ParseError('Found closing delimeter %s before '
- 'corresponding %s' % (c, delims[0]))
- started += 1
- delim = c
- else:
- if ((mode == 'parens' and c == delim) or
- (mode == 'quotes' and c != delim)):
- string = '%s%s' % (string, c)
- continue # wait for matching delim
- started -= 1
- if not started:
- values.append(string)
- new_args = '%s %s' % (new_args, '%s%d' % (placeholder, my_id))
- my_id += 1
- string = ''
- else:
- if not started:
- new_args = '%s%s' % (new_args, c)
- else:
- string = '%s%s' % (string, c)
- if started:
- if mode == 'parens':
- waiting_for = delims[1]
- else:
- waiting_for = delim
- raise ParseError('Unterminated block, waiting for ' + waiting_for)
- self.args = new_args
- Debug('Values: %s' % values)
- return values
- def _ReplaceStringLiterals(self, s, quotes=False):
- """Replaces string placeholders with real values.
- If quotes is set to True surround the returned value with single quotes
- Args:
- s: string
- quotes: bool
- Returns:
- s: string
- """
- if s.strip().startswith(self._str_placeholder):
- str_index = int(s.split(self._str_placeholder)[1])
- s = self.strings[str_index]
- if quotes:
- s = "'" + s + "'"
- return s
- def _ReplaceParens(self, s):
- """Replaces paren placeholders with real values.
- Args:
- s: string
- Returns:
- s: string
- """
- if s.strip().startswith(self._paren_placeholder):
- str_index = int(s.split(self._paren_placeholder)[1])
- s = self.parens[str_index].strip()
- return s
- def _RunDelete(self):
- """Run the DELETE command.
- Go through the rows in self.data matching them
- against the conditions, if they fit delete the row leaving a placeholder
- value (in order to keep the iteration process sane). Afterward clean up
- any empty values.
- Returns:
- dataset: [number of affected rows]
- """
- i = 0
- length = len(self.data)
- affected = 0
- while i < length:
- if self._MatchRow(self.data[i]):
- self.data[i] = None
- affected += 1
- i += 1
- # clean out the placeholders
- while None in self.data:
- self.data.remove(None)
- return [affected]
- def _RunUpdate(self):
- """Run the UPDATE command.
- Find the matching rows and update based on self.targets
- Returns:
- affected: [int]
- Raises:
- ExecuteError: failed to run UPDATE
- """
- i = 0
- length = len(self.data)
- affected = 0
- while i < length:
- if self._MatchRow(self.data[i]):
- for target in self.targets:
- if target not in self.header:
- raise ExecuteError(target + ' is an invalid column name')
- if self.header[target]['auto']:
- raise ExecuteError(target + ' is type auto and connot be updated')
- self.data[i][target] = self._TypeCheck(self.targets[target], target)
- affected += 1
- i += 1
- return [affected]
- def _RunInsert(self):
- """Run the INSERT command.
- Build up the row based on self.targets and table defaults, then append to
- self.data
- Returns:
- affected: [int]
- Raises:
- ExecuteError: failed to run INSERT
- """
- new_row = {}
- cols = self._SortHeaderColumns()
- for col in cols:
- if col in self.targets:
- if self.header[col]['auto']:
- raise ExecuteError(col + ' is type auto: cannot be modified')
- new_row[col] = self.targets[col]
- elif self.header[col]['null']:
- new_row[col] = ''
- elif self.header[col]['auto']:
- new_row[col] = self._GetNextAuto(col)
- else:
- raise ExecuteError(col + ' cannot be empty or null')
- self.data.append(new_row)
- return [1]
- def _GetNextAuto(self, col):
- """Figure out the next value for col based on existing values.
- Scan all the current values and return the highest one + 1.
- Args:
- col: string
- Returns:
- next: int
- Raises:
- ExecuteError: Failed to get auto inc
- """
- highest = 0
- seen = []
- for row in self.data:
- if row[col] > highest:
- highest = row[col]
- if row[col] not in seen:
- seen.append(row[col])
- else:
- raise ExecuteError('duplicate value %s in %s' % (row[col], col))
- return highest + 1
- def _RunSelect(self):
- """Run the SELECT command.
- Returns:
- dataset: []
- Raises:
- ExecuteError: failed to run SELECT
- """
- dataset = []
- if ['*'] == self.columns:
- self.columns = self._SortHeaderColumns()
- for row in self.data:
- if self._MatchRow(row):
- match = []
- for col in self.columns:
- if col in self.aliases:
- concat = ''
- for concat_col in self.aliases[col]:
- if concat_col.startswith("'") and concat_col.endswith("'"):
- concat += concat_col.strip("'")
- elif concat_col not in self.header.keys():
- raise ExecuteError('Table %s does not have a column %s' %
- (self.table, concat_col))
- else:
- concat = '%s%s' % (concat, row[concat_col])
- if not concat.strip():
- raise ExecuteError('Empty CONCAT statement')
- my_match = concat
- elif col.startswith("'") and col.endswith("'"):
- my_match = col.strip("'")
- elif col not in self.header.keys():
- raise ExecuteError('Table %s does not have a column %s' %
- (self.table, col))
- else:
- my_match = row[col]
- match.append(self._UnEscapeChars(my_match))
- dataset.append(match)
- if self.count:
- Debug('Dataset: %s' % dataset)
- dataset = [len(dataset)]
- if self.order_by:
- if self.order_by not in self.header.keys():
- raise ExecuteError('Unknown column %s in ORDER BY clause' %
- self.order_by)
- pos = self._PositionByCol(self.order_by)
- dataset = self._SortMatrixByCol(dataset, pos)
- return dataset
- def _SortMatrixByCol(self, dataset, pos):
- """Sorts the matrix (array or arrays) based on a given column value.
- That is, if given matrix that looks like:
- [[1, 2, 3], [6, 5, 4], [3, 2, 1]]
- given pos = 0 produce:
- [[1, 2, 3], [3, 2, 1], [6, 5, 4]]
- given pos = 1 produce:
- [[1, 2, 3], [3, 2, 1], [6, 5, 4]]
- given pos = 2 produce:
- [[3, 2, 1], [1, 2, 3], [6, 5, 4]]
- Works for both integer and string values of column.
- Args:
- dataset: [[], [], ...]
- pos: int
- Returns:
- sorted: [[], [], ...]
- """
- # prepend value in pos to the beginning of every row
- i = 0
- while i < len(dataset):
- dataset[i].insert(0, dataset[i][pos])
- i += 1
- # sort the matrix, which is done on the row we just prepended
- dataset.sort()
- # strip away the first value
- i = 0
- while i < len(dataset):
- dataset[i].pop(0)
- i += 1
- return dataset
- def _MatchRow(self, row):
- """Matches the row against self.conditions.
- Args:
- row: ['val', 'val']
- Returns:
- Bool
- """
- match = True
- # when there are no conditions we match everything
- if not self.conditions:
- return match
- for condition in self.conditions:
- cond_val = self.conditions[condition]
- if condition not in self.header.keys():
- match = False
- break
- else:
- if cond_val != row[condition]:
- match = False
- break
- return match
- def _ProcessHeader(self):
- """Parse out the header information.
- Returns:
- {col_name: {'type': string, 'null': string, 'auto': string, 'pos': int}}
- """
- header = self.fd.readline().strip()
- cols = {}
- pos = 0
- for col in header.split():
- col_name = col.split('(')[0]
- col_type = col.split('(')[1].split(')')[0].split(',')[0]
- col_null = False
- col_auto = False
- if ',' in col.split('(')[1].split(')')[0]:
- if col.split('(')[1].split(')')[0].split(',')[1].lower() == 'null':
- col_null = True
- if col.split('(')[1].split(')')[0].split(',')[1].lower() == 'auto':
- col_auto = True
- cols[col_name] = {}
- cols[col_name]['type'] = col_type
- cols[col_name]['null'] = col_null
- cols[col_name]['auto'] = col_auto
- cols[col_name]['pos'] = pos
- pos += 1
- return cols
- def _GetData(self):
- """Reads table data into memory as a list of dicts keyed on column names.
- Returns:
- data: [{row}, {row}, ...]
- Raises:
- ExecuteError: failed to get data
- """
- data = []
- row_num = 0
- for row in self.fd:
- row = row.rstrip('\n')
- row_dict = {}
- i = 0
- field_start = 0
- field_num = 0
- while i < len(row):
- if row[i] == ':':
- # the following block is executed again after the while is done
- val = row[field_start:i]
- col = self._ColByPosition(field_num)
- val = self._TypeCheck(val, col)
- row_dict[col] = val
- field_start = i + 1 # skip the colon itself
- field_num += 1
- if row[i] == '\\':
- i += 2 # skip the next char since it's escaped
- else:
- i += 1
- # handle the last field since we won't hit a : at the end
- # sucks to duplicate the code outside the loop but I can't think
- # of a better way :(
- val = row[field_start:i]
- col = self._ColByPosition(field_num)
- val = self._TypeCheck(val, col)
- row_dict[col] = val
- # verify that all columns were created
- for col in self.header:
- if col not in row_dict:
- raise ExecuteError('%s is missing from row %d in %s' %
- (col, row_num, self.table))
- row_num += 1
- data.append(row_dict)
- return data
- def _TypeCheck(self, val, col):
- """Verify type of val based on the header.
- Make sure the value is returned in quotes if it's a string
- and as '' when it's empty and Null
- Args:
- val: string
- col: string
- Returns:
- val: string
- Raises:
- ExecuteError: invalid value or column
- """
- if not val and not self.header[col]['null']:
- raise ExecuteError(col + ' cannot be empty or null')
- if (self.header[col]['type'].lower() == 'int' or
- self.header[col]['type'].lower() == 'double'):
- try:
- if val:
- val = eval(val)
- except NameError, e:
- raise ExecuteError('Failed to parse %s in %s '
- '(unable to convert to type %s): %s' %
- (col, self.table, self.header[col]['type'], e))
- except SyntaxError, e:
- raise ExecuteError('Failed to parse %s in %s '
- '(unable to convert to type %s): %s' %
- (col, self.table, self.header[col]['type'], e))
- return val
- def _ColByPosition(self, pos):
- """Returns column name based on position.
- Args:
- pos: int
- Returns:
- column: string
- Raises:
- ExecuteError: invalid column
- """
- for col in self.header:
- if self.header[col]['pos'] == pos:
- return col
- raise ExecuteError('Header does not contain column %d' % pos)
- def _PositionByCol(self, col):
- """Returns position of the column based on the name.
- Args:
- col: string
- Returns:
- pos: int
- Raises:
- ExecuteError: invalid column
- """
- if col not in self.header.keys():
- raise ExecuteError(col + ' is not a valid column name')
- return self.header[col]['pos']
- def _SortHeaderColumns(self):
- """Sort column names by position.
- Returns:
- sorted: [col1, col2, ...]
- Raises:
- ExecuteError: unable to sort header
- """
- cols = self.header.keys()
- sorted_cols = [''] * len(cols)
- for col in cols:
- pos = self.header[col]['pos']
- sorted_cols[pos] = col
- if '' in sorted_cols:
- raise ExecuteError('Unable to sort header columns: %s' % cols)
- return sorted_cols
- def OpenTable(self):
- """Opens the table file and places its content into memory.
- Raises:
- ExecuteError: unable to open table
- """
- # if we already have a header assume multiple queries on same table
- # (can't use self.data in case the table was empty to begin with)
- if self.header:
- return
- try:
- self.fd = open(os.path.join(self.location, self.table), 'r')
- self.header = self._ProcessHeader()
- if self.command in ['INSERT', 'DELETE', 'UPDATE']:
- fcntl.flock(self.fd, fcntl.LOCK_EX)
- self.data = self._GetData()
- self.orig_data = self.data[:] # save a copy of the data before modifying
- except IOError, e:
- raise ExecuteError('Unable to open table %s: %s' % (self.table, e))
- Debug('Header is: %s' % self.header)
- # type check the conditions
- for cond in self.conditions:
- if cond not in self.header.keys():
- raise ExecuteError('unknown column %s in WHERE clause' % cond)
- self.conditions[cond] = self._TypeCheck(self.conditions[cond], cond)
- # type check the targets
- for target in self.targets:
- if target not in self.header.keys():
- raise ExecuteError('unknown column in targets: %s' % target)
- self.targets[target] = self._TypeCheck(self.targets[target], target)
- Debug('Type checked conditions: %s' % self.conditions)
- Debug('Data is:')
- for row in self.data:
- Debug('=======================')
- Debug(row)
- Debug('=======================')
- def WriteTempTable(self):
- """Write table header and data.
- First write header and data to a temp file,
- then move the tmp file to replace the original table file.
- """
- self.temp_file = tempfile.NamedTemporaryFile()
- Debug('temp_file: ' + self.temp_file.name)
- # write header
- columns = self._SortHeaderColumns()
- header = ''
- for col in columns:
- header = '%s %s' % (header, col)
- header = '%s(%s' % (header, self.header[col]['type'])
- if self.header[col]['null']:
- header = '%s,null)' % header
- elif self.header[col]['auto']:
- header = '%s,auto)' % header
- else:
- header = '%s)' % header
- self.temp_file.write(header.strip() + '\n')
- # write data
- for row in self.data:
- row_str = ''
- for col in columns:
- row_str = '%s:%s' % (row_str, row[col])
- self.temp_file.write(row_str[1:] + '\n')
- self.temp_file.flush()
- def MoveTableIntoPlace(self):
- """Replace the real table with the temp one.
- Diff the new data against the original and replace the table when they are
- different.
- """
- if self.data != self.orig_data:
- temp_file = self.temp_file.name
- table_file = os.path.join(self.location, self.table)
- Debug('Copying %s to %s' % (temp_file, table_file))
- shutil.copy(self.temp_file.name, self.location + '/' + self.table)
- def _ParsePairs(self, s, delimeter):
- """Parses out name value pairs from a string.
- String contains name=value pairs
- separated by a delimiter (such as "and" or ",")
- Args:
- s: string
- delimeter: string
- Returns:
- my_dict: dictionary
- Raises:
- ParseError: unable to parse pairs
- """
- my_dict = {}
- Debug('parse pairs: [%s]' % s)
- pairs = s.split(delimeter)
- for pair in pairs:
- if '=' not in pair:
- raise ParseError('Invalid condition pair: ' + pair)
- split = pair.split('=')
- Debug('split: %s' % split)
- if len(split) != 2:
- raise ParseError('Invalid condition pair: ' + pair)
- col = split[0].strip()
- if not col or not split[1].strip() or ' ' in col:
- raise ParseError('Invalid condition pair: ' + pair)
- val = self._ReplaceStringLiterals(split[1].strip())
- my_dict[col] = val
- return my_dict
- class Error(Exception):
- """DBText error."""
- class ParseError(Error):
- """Parse error."""
- class NotSupportedError(Error):
- """Not Supported error."""
- class ExecuteError(Error):
- """Execute error."""
- def main(argv):
- if len(argv) < 2:
- print 'Usage %s query' % argv[0]
- sys.exit(1)
- if 'DBTEXT_PATH' not in os.environ or not os.environ['DBTEXT_PATH']:
- print 'DBTEXT_PATH must be set'
- sys.exit(1)
- else:
- location = os.environ['DBTEXT_PATH']
- try:
- conn = DBText(location)
- dataset = conn.Execute(' '.join(argv[1:]))
- if dataset:
- for row in dataset:
- if conn.command != 'SELECT':
- print 'Updated %s, rows affected: %d' % (conn.table, row)
- else:
- print row
- except Error, e:
- print e
- sys.exit(1)
- if __name__ == '__main__':
- main(sys.argv)
|