#! /usr/bin/env python

#This script creates a hadoop command from a list of inputs,outputs, cacheArchive, jobconf, mapper,reducer and 

#The tool command is the original tool command to run
#the mapper, reducer and output are string
#the remaining are lists 

#input will be upload using the XML Server
def createCommand(inputs, output,mapper,reducer, filesPack=None,cacheArchives=None,jobconf=None,cacheFiles=None):
	cmd= "hadoop jar /home/hadoop/contrib/streaming/hadoop-streaming.jar -output %s -mapper %s -reducer %s " %(output,mapper,reducer) 
	for file in inputs:
		cmd+= "-input " + file + " "
	if filesPack!=None:
		for file in filesPack:
			cmd+="-file " + file +" "
	if cacheArchives!=None:
		for archive in cacheArchives:
			cmd+="-cacheArchive " + archive + " " 
	if cacheFiles!=None:
		for file in cacheFiles:
			cmd+="-cacheFile " + file + " "
	if jobconf!=None:
		for conf in jobconf:
			cmd += "-jobconf " + conf + " "
	return cmd

def distCp(src,dst):
	if type(src)==type("a"): #SingleFile
		return "/home/hadoop/bin/hadoop distcp " + src + " " + dst
	else: #Lists
		commands=[]
		if len(src)==len(dst):
			for i in range(0,len(src)):
				commands.append("hadoop distcp %s %s")%(src[i],dst[i])
		else:
			raise ("SRC List must be the same length of DST List")

def createMapperScript(toolCommand,inputs,outputs):
	i=1
	for file in inputs:
		toolCommand = toolCommand.replace(file,'QueryFile' + str(i)+'.fasta')
		i+=1
	if len(outputs)>0:
		for file in outputs:
			filename=file.split('/')[-1]
			toolCommand = toolCommand.replace(file,'outputs/' + filename)
			
		
	st = """#!/usr/bin/env python
import os,subprocess,sys
def run(executer):
        #p =run (executer)
        PIPE=subprocess.PIPE
        p=subprocess.Popen(executer,stdout=PIPE,stderr=PIPE,shell=True)
        p.wait()
        st=p.stderr.read()
        if len(st)>0:
                return "Err:" + st

        else:
                return str(p.stdout.read()).strip()
"""
	if (outputs!=None):
		if (len(outputs)>0):
			st+="""
os.makedirs('outputs')
readOutputFiles=True
	
		"""
		else:
			st+="""
readOutputFiles=False
		"""
	else:
		st+="""
readOutputFiles=False
		"""

	st+="""
line = sys.stdin.readline()
i=1
try:
	fname="QueryFile1.fasta"
        f=open(fname,'w')
        while line:
		if (line==""): break
		s=line.split("||")
                f.write(">"+s[0]+"\\n"+s[1])
                line = sys.stdin.readline()
	f.flush()
	f.close()
	result= run('%s')
	if (readOutputFiles):
		files=os.listdir('outputs')
		for file in files:
			f=open('outputs/'+file,'r')
			t=f.read()
			print file #+'\\n'
			print t
		#	print "\\n--file--\\n"
			print "--file--"
			f.close()
		sys.exit(0)
			
	if (result==""):
		print "No Result"
	else: 
		print result	
except "end of file":
        pass
""" %(toolCommand)
	return st

if __name__ == "__main__":
	print createMapperScript('"./blastn -query " + fname + " -db nt/nt -outfmt 6"')
	print createCommand(['s3://eg.nubios.us/emr/query.formatted'],"s3://eg.nubios.us/emr/output/ex1","s3://eg.nubios.us/emr/nt2.py","NONE",cacheArchives=['s3://eg.nubios.us/nt8/0.tar.gz#nt'], filesPack=['s3://eg.nubios.us/emr/blastn'], cacheFiles=['s3://eg.nubios.us/hello'],jobconf=['mapred.map.tasks=8','mapred.reduce.tasks=1'])


