You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ambar2/Pipeline/containerprocessors/pstprocessor.py

167 lines
6.5 KiB
Python

from model import AmbarFileMeta, AmbarFileContent
from datetime import datetime
from hashlib import sha256
from subprocess import call
from os import walk, path
import hashlib
import re
import io
class PstProcessor():
def __init__(self, Logger, ApiProxy):
self.logger = Logger
self.apiProxy = ApiProxy
self.tempPath = '/pst-temp'
self.pstFileName = 'archive.pst'
def CleanUpTemp(self):
retcode = -1
try:
retcode = call('rm -rf {0}/*'.format(self.tempPath), shell=True)
if retcode != 0:
self.logger.LogMessage('info', 'error cleaning temp dir, code: {0}'.format(retcode))
return False
except Exception as e:
self.logger.LogMessage('info', 'error cleaning temp dir')
return False
return True
def ExtractPstArchive(self):
retcode = -1
cmd = 'readpst -o {0} -D -j 1 -r -tea -u -w -e {0}/{1}'.format(self.tempPath, self.pstFileName)
try:
retcode = call(cmd, shell=True)
if retcode != 0:
self.logger.LogMessage('info', 'error extracting pst, code: {0}'.format(retcode))
return False
except Exception as e:
self.logger.LogMessage('error', 'error extracting pst {0}'.format(repr(e)))
return False
return True
def WriteFileData(self, FileData):
try:
f = open('{0}/{1}'.format(self.tempPath, self.pstFileName), 'wb')
f.write(FileData)
f.close()
except Exception as e:
self.logger.LogMessage('error', 'error writing file {0}'.format(repr(e)))
return False
return True
def ReadFileData(self, FilePath):
try:
f = open(FilePath, 'rb')
fileData = f.read()
f.close()
return fileData
except Exception as e:
self.logger.LogMessage('error', 'error reading file {0} {1}'.format(FilePath, e))
return None
def Process(self, FileData, FileMeta, SourceId):
self.logger.LogMessage('verbose', 'processing pst archive {0}'.format(FileMeta.full_name))
try:
if not self.CleanUpTemp():
return
if not self.WriteFileData(FileData):
return
if not self.ExtractPstArchive():
return
for (dirpath, dirnames, filenames) in walk(self.tempPath):
for fileName in filenames:
self.logger.LogMessage('verbose', 'enqueuing file {0} from pst archive {1}'.format(fileName, FileMeta.full_name))
fullNameInArchive = '{0}{1}'.format(FileMeta.full_name, path.join(dirpath.replace(self.tempPath,''), fileName))
fullNameInFs = path.join(dirpath, fileName)
fileData = self.ReadFileData(fullNameInFs)
if not fileData:
continue
sha = sha256(fileData).hexdigest()
size = len(fileData)
if size == 0:
continue
# checking content existance
apiResp = self.apiProxy.CheckIfParsedAmbarFileContentExists(sha)
if not apiResp.Success:
self.logger.LogMessage('error', 'error checking content existance {0} {1}'.format(fullNameInArchive, apiResp.message))
continue
if not (apiResp.Found or apiResp.NotFound):
self.logger.LogMessage('error', 'unexpected response on checking content existance {0} {1} {2}'.format(fullNameInArchive, apiResp.code, apiResp.message))
continue
if apiResp.NotFound:
self.logger.LogMessage(
'verbose', 'content not found {0}'.format(fullNameInArchive))
# creating content
createContentApiResp = self.apiProxy.CreateAmbarFileContent(fileData, sha)
if not createContentApiResp.Success:
self.logger.LogMessage('error', 'error creating content {0} {1}'.format(fullNameInArchive, createContentApiResp.message))
continue
if not (createContentApiResp.Found or createContentApiResp.Created):
self.logger.LogMessage('error', 'unexpected response on create content {0} {1} {2}'.format(fullNameInArchive, createContentApiResp.code, createContentApiResp.message))
continue
if createContentApiResp.Found:
self.logger.LogMessage('verbose', 'content found {0}'.format(fullNameInArchive))
if createContentApiResp.Created:
self.logger.LogMessage('verbose', 'content created {0}'.format(fullNameInArchive))
if apiResp.Found:
self.logger.LogMessage('verbose', 'content found {0}'.format(fullNameInArchive))
# sending meta back to queue
fileMeta = AmbarFileMeta.InitWithoutId(FileMeta.created_datetime, FileMeta.updated_datetime, fileName, fullNameInArchive, FileMeta.source_id)
apiResp = self.apiProxy.EnqueueAmbarFileMeta(
fileMeta, sha, SourceId)
if not apiResp.Success:
self.logger.LogMessage('error', 'error adding meta {0} {1}'.format(
fileMeta.full_name, apiResp.message))
continue
if apiResp.BadRequest:
self.logger.LogMessage('verbose', 'bad meta, ignoring... {0}'.format(fileMeta.full_name))
continue
if apiResp.InsufficientStorage:
self.logger.LogMessage('verbose', 'insufficient storage'.format(fileMeta.full_name))
continue
if not apiResp.Ok:
self.logger.LogMessage('error', 'unexpected response on adding meta {0} {1} {2}'.format(fileMeta.full_name, apiResp.code, apiResp.message))
continue
self.logger.LogMessage('verbose', 'meta added {0}'.format(fileMeta.full_name))
self.CleanUpTemp()
except Exception as ex:
self.logger.LogMessage(
'info', 'unable to unpack {0} {1}'.format(FileMeta.full_name, ex))