001: #!/usr/bin/python
002: """
003: Common code between ale and cask.
004: """
005: 
006: import errno
007: import hashlib
008: import os
009: import sqlite3
010: import stat
011: import sys
012: 
013: 
014: def stdout(msg, *args):
015:   if args:
016:     msg = msg % args
017:   print msg
018: 
019: 
020: def stderr(msg, *args):
021:   if args:
022:     msg = msg % args
023:   print >>sys.stderr, msg
024: 
025: 
026: def MakeDir(path):
027:   try:
028:     os.mkdir(path)
029:   except OSError as e:
030:     if e.errno != errno.EEXIST:
031:       raise
032: 
033: 
034: class SqliteCursor(object):
035:   """Context manager that gets a cursor from a DB name."""
036: 
037:   # NOTE: The sqlite3 module already has a context manager, but it's not what
038:   # we want (It's about # transaction/rollback, which seems wrong.)
039: 
040:   def __init__(self, db_name, create=False):
041:     """
042:     Args:
043:       create: Whether to create the db first
044:     """
045:     self.db_name = db_name
046:     self.create = create
047: 
048:   def __enter__(self):
049:     # We never want connect() to create an empty db.  The .sqlite3 files are
050:     # explicitly created with 'ale init' or 'cask init'.
051:     # 
052:     # TODO: It is correct o have a lock around the repo, so we don't have a
053:     # race here.
054: 
055:     if not self.create and not os.path.isfile(self.db_name):
056:       raise RuntimeError("%s doesn't exist" % self.db_name)
057: 
058:     # Turn off autocommit -- parsing of sql statements
059:     self.conn = sqlite3.connect(self.db_name, isolation_level=None)
060: 
061:     # Filenames on ext4 are BYTE strings (utf-8 encoded)
062:     self.conn.text_factory = str
063: 
064:     self.cursor = self.conn.cursor()
065:     self.cursor.execute('BEGIN TRANSACTION')
066:     return self.cursor
067: 
068:   def __exit__(self, type, value, traceback):
069:     self.cursor.execute('END TRANSACTION')  # alias for commit
070:     self.cursor.close()
071: 
072:     # It's confusing that this is on the connection and not the cursor!
073:     self.conn.commit()
074:     self.conn.close()
075: 
076: 
077: def WalkTree(prefix, current_dir, handler, skip_func=None):
078:   """Recursively serialize a tree to the given stream.
079: 
080:   Args:
081:     prefix: root directory
082:     dir: current dir, recursive arg
083:     handler: Called back for eacch file
084: 
085:   Returns:
086:     Number of
087:   """
088:   this_dir = []
089:   file_count = 0
090:   total_bytes = 0
091: 
092:   full_dir = os.path.join(prefix, current_dir)
093:   entries = sorted(os.listdir(full_dir))
094: 
095:   # TODO: Use --progress flag to show progress.
096:   #log('pack %s', dir)
097:   for name in entries:
098:     # Skip dot files, ESPECIALLY .ale!
099:     if name.startswith('.'):
100:       continue
101:     if skip_func and skip_func(name):
102:       continue
103: 
104:     rel_path = os.path.join(current_dir, name)
105:     full_path = os.path.join(prefix, rel_path)
106:     lstat = os.lstat(full_path)
107: 
108:     mode = lstat.st_mode
109: 
110:     # This would break the parsing
111:     if '\n' in name:
112:       raise RuntimeError("Newlines aren't allowed in filenames (%r)" % name)
113: 
114:     if stat.S_ISLNK(mode):  # symlink
115:       stderr('Skipping symlink %r', rel_path)
116: 
117:     elif stat.S_ISREG(mode):  # file
118:       handler.OnFile(full_path, rel_path, lstat)
119: 
120:       file_count += 1
121:       total_bytes += lstat.st_size
122: 
123:     elif stat.S_ISDIR(mode):  # directory
124:       # recurse
125:       f, t = WalkTree(prefix, rel_path, handler, skip_func=skip_func)
126:       file_count += f
127:       total_bytes += t
128: 
129:     else:
130:       raise RuntimeError("Can't serialize %r, of type %o" % (name, mode))
131: 
132:   return file_count, total_bytes
133: 
134: 
135: class FancyPrinter(object):
136: 
137:   def __init__(self):
138:     pass
139: 
140:   def OnChecksumBegin(self, rel_path, num_bytes):
141:     """Called when a file is starting to be checksummed.
142: 
143:     Args:
144:       rel_path: repo path of the file
145:       num_bytes: total size of the file that WILL be checksummed
146:     """
147:     sys.stdout.write('%10s MB  %s\r' % ('0.0', rel_path))
148:     self.last_num_bytes = num_bytes
149: 
150:   def OnChecksumProgress(self, num_bytes):
151:     """Called after you have checksummed a certain number of bytes."""
152:     # TODO:
153:     # - Show seconds, and MB/s ?
154: 
155:     n = '%.1f' % (float(num_bytes) / 1e6)
156:     d = '%.1f' % (float(self.last_num_bytes) / 1e6)
157:     #progress = '%7s of %7s MB' % (n, d)
158:     #progress = '%7s MB' % n
159:     sys.stdout.write('%10s MB\r' % n)  # progress bar
160: 
161:     # TODO: Write it only on some chunks?
162:     #sys.stdout.flush()
163: 
164:   def OnChecksumDone(self):
165:     sys.stdout.write('\n')
166: 
167: 
168: # 1 MiB blocks (but report progress in MB -- powers of 10)
169: CHUNK_SIZE = 1024 * 1024
170: 
171: def ChecksumFile(f, printer):
172:   h = hashlib.sha1()
173:   num_bytes = 0
174:   i = 0
175:   while True:
176:     chunk = f.read(CHUNK_SIZE)
177:     if not chunk:  # EOF
178:       break
179:     h.update(chunk)
180:     num_bytes += len(chunk)
181:     printer.OnChecksumProgress(num_bytes)
182: 
183:   printer.OnChecksumDone()
184:   return h.digest()
185: