#! /usr/bin/env python
# This is responsible script for running emr
import subprocess,sys,os,simplejson,time,config
EMRPath=config.emrcli
mainPath=os.path.dirname(os.path.abspath( __file__ ))+'/'
import hadoop
sys.path.append(mainPath+"Client")
import CommandClient as Client
options=""
domain=""
owner="utils"
id="1"
command=""
inputFiles=None
outputFiles=None
outDir="/home/hadoop/output/"
hadoopMode='regular'
FetchOutputFiles=True
files=[]
conf=""
cacheFiles=[]
cacheArchives=[]
configuration=""
reducer="NONE"
emulate=False
count=1
params=""
accessKey=""
privateKey=""
insType="m1.small"

def createCluster(insType,count,params,options):
		global privateKey,accessKey
		cmd=EMRPath+'elastic-mapreduce --create --ami-version 2.0 --alive --num-instances %s '%count
		if insType!="":
			cmd +="--instance-type %s "%insType
		cmd += "--bootstrap-action s3://eg.nubios.us/emrBoot.sh %s %s"%(params,options)
		if emulate:
			print cmd
			exit(0)
	#		raise Exception("stopped by developer")
		res = run (cmd)
		if 'j-' not in res:
			print res
			exit(2)
		id='j-'+res.split('j-')[1]
		
		#id='j-8HH0FOKNT62X'
		print "Started New Jobflow with id: " + id
		print "Waiting for cluster to start"
		while 1:
			r=run(EMRPath+'elastic-mapreduce --describe -a %s -p %s %s' %(accessKey,privateKey,id))
			#print r
			try:
				res= simplejson.loads (r)
			except: 
				print r
			status= res["JobFlows"][0]["ExecutionStatusDetail"]["State"]		
			#print status
			if status=="STARTING":
				print "Machines are still starting, will check again in 15 secs"
			
			elif status=="BOOTSTRAPPING":
				print "Machines Started, Running BootStrapping Script"
			elif status=="WAITING":
					print "MR cluster is ready"
					domain=res["JobFlows"][0]["Instances"]["MasterPublicDnsName"]
					print "JOB ID:" + id +" (required to terminate cluster)"
					print "domain: " +domain
					break
			elif status=="SHUTTING_DOWN":
				print "There is a problem while configuring bootstrap, machines are shutting down"
				exit(-1)
			time.sleep(15)

def submitEMRJob(domain,id,CommandLine,inputs,outputFiles,outputDir,reducer="NONE",PackFiles=None,Archives=None,Files=None,jobConf=None):
	
	global FetchOutputFiles
	map=hadoop.createMapperScript(CommandLine,inputs,outputFiles)
	mapFile="/home/hadoop/scripts/" + str(id)  +".py"
	#mapFile="hdfs:///home/hadoop/scripts/" + str(id)  +".py"

	f=open(mapFile,'w')
	f.write(map)
	f.close()
	#return mapFile
	cmd=hadoop.createCommand(inputs,"hdfs:///home/hadoop/outputs/" + str(id) ,mapFile,reducer,filesPack=PackFiles,cacheArchives=Archives,jobconf=jobConf,cacheFiles=Files)
	#print cmd
	#return ""
	inputs.append(mapFile)
	return Client.submitJob(domain,str(id),"hadoop",cmd,inputs,[outputDir])




def runEMRJob(domain,id,CommandLine,inputs,outputFiles,outputDir,reducer="NONE",PackFiles=None,Archives=None,Files=None,jobConf=None):
	
	global FetchOutputFiles
	map=hadoop.createMapperScript(CommandLine,inputs,outputFiles)
	mapFile="/home/hadoop/scripts/" + str(id)  +".py"
	#mapFile="hdfs:///home/hadoop/scripts/" + str(id)  +".py"

	f=open(mapFile,'w')
	f.write(map)
	f.close()
	#return mapFile
	cmd=hadoop.createCommand(inputs,"hdfs://" + outputDir ,mapFile,reducer,filesPack=PackFiles,cacheArchives=Archives,jobconf=jobConf,cacheFiles=Files)
	#print cmd
	#return ""
	inputs.append(mapFile)
	Client.sendNewJob(domain,str(id),"hadoop",cmd,inputs,[outputDir],FetchOutputFiles)
	
def run(excuter,Wait=True):
	
	PIPE=subprocess.PIPE
	p=subprocess.Popen(excuter,stdout=PIPE,stderr=PIPE,shell=True)
	
	if Wait:
		p.wait()
		st=p.stderr.read()
		if len(st)>0:
			#f = open("/tmp/ToolService.err",'w')
			#f.write(st)
			#f.flush()
			#f.close()
			return "Childerr:" + st
		
		else:
			return str(p.stdout.read()).strip()
	else:
		return p
if __name__=="__main__":
	args=sys.argv[2:]
	for arg in args:
		if "--num-instances" in arg or "-n=" in arg:
			count =arg.split("=")[1]
		elif "--instance-type" in arg or "-t=" in arg:
			insType=arg.split("=")[1] 
		elif "-i=" in arg or "--install" in arg:
			lst=arg.split("=")[1].split(",")
			for file in lst:
				params+= "--bootstrap-action " + file +" " 
		elif "-a" in arg or "--acessKey" in arg:
			accessKey=arg.split("=")[1]
			params+="-a " + arg.split("=")[1]+" "
		elif "-p" in arg or "--privateKey" in arg:
			privateKey=arg.split("=")[1]
			params+="-p " + arg.split("=")[1]+" "
		elif "-kp" in arg or "--key-pair" in arg:
			params+="--key-pair " + arg.split("=")[1]+" "
		elif "-r" in arg or "--region" in arg:
			params+="--region " + arg.split("=")[1] +" "
		elif "-kf" in arg or "--key-pair-file" in arg:
			params+="--key-pair-file " + arg.split("=")[1] +" "
		elif "--name" in arg:
			params+=arg.split("=")[1]+" "
		elif "--options" in arg:
			params+=arg.split("=")[1] + " " 
		elif "--emulate" in arg:
			emulate=True
		elif "--domain" in arg or "-d=" in arg:
				domain=arg.split("=")[1]
		elif "--command" in arg:
			command=arg.split('=')[1]	
		elif "--mode" in arg or '-m=' in arg:
			hadoopMode=arg.split('=')[1]	
		elif '--input-files' in arg or '-i=' in arg:
			inputFiles=arg.split("=")[1].split(',')
		elif '--output-files' in arg or '-o=' in arg:
			outputFiles=arg.split("=")[1].split(',')
		elif '--no-fetch-output-files' in arg:
			FetchOutputFiles=False
		elif "-id" in arg:
			id=arg.split('=')[1]
		elif "--owner" in arg:
			owner=arg.split("=")[1]
		elif "--reducer" in arg:
			reducer=arg.split("=")[1]
		elif "--output-dir" in arg:
			outputDir=arg.split("=")[1]
		elif "--conf" in arg:
			conf=arg.split("=",1)[1].split(',')
		elif "--files" in arg:
			files=arg.split("=")[1].split(",")
		elif "--cache-archives" in arg or "-ca=" in arg:
			cacheArchives=arg.split("=")[1].split(',')
			
		elif "--cache-files" in arg or "-cf=" in arg:
			cacheFiles=arg.split("=")[1].split(',')
		else:
			print "unrecognized parameter:" , arg 
			exit(-2)

	mode=sys.argv[1]
	if EMRPath[-1]!="/": EMRPath+='/'
	if not os.path.exists(EMRPath+"elastic-mapreduce"):
		print "ERR: can't find the elastic-mapreduce client in '%s', make sure it exists there or reconfigure it in 'config.cfg'"%EMRPath
		exit(-1)

	if mode=="--create":
		createCluster(insType,count,params,options)
	elif mode=="--terminate":
		print run (EMRPath+'elastic-mapreduce --terminate '+sys.argv[2])
	 	
	elif mode=="--run" or mode=="--submit":
		if hadoopMode=="regular":
				print "Executing in a regular mode"
				#cmd=hadoop.distCp(src,ds)
				print "Executing " , command
				print Client.sendNewJob(domain,id,owner,command,inputs=inputFiles,outputs=outputFiles)
		else:
			if mode=="--run":
				runEMRJob(domain,id,command,inputFiles,outputFiles,outputDir,reducer,PackFiles=files,Archives=cacheArchives,Files=cacheFiles,jobConf=conf)
			else:
				print "Job ID:" + submitEMRJob(domain,id,command,inputFiles,outputFiles,outputDir,reducer,PackFiles=files,Archives=cacheArchives,Files=cacheFiles,jobConf=conf)
	elif mode=="--status":
		print Client.Job.checkStatus(domain,id,owner)
	elif mode=="--fetch-files":
		print Client.fetchOutputFiles(domain,outputFiles)
	elif mode=="--fetch-outputs":
		print outputDir
		files=Client.findOutputFiles(domain,id,owner,[outputDir])
		if files !=None:
			print files
			print Client.fetchOutputFiles(domain,id,owner,files,outputDir)
		
