#!/bin/bash

#######################################################################
#################### HEADER, CHANGE VALUES HERE #######################
#######################################################################
starters_file=""
read_sets="" # FOR instance: "read_set1.fa.gz read_set2.fq.gz"
prefix_index="index" # all files used for indexing will be written will start with this prefix
prefix_results="res" # all final files will be written will start with this prefix
extremities_filename="starter_extremities.fa"
k=31 # size of kmers
c=5 # minimal coverage
d=1 # estimated number of error per read
g=3000000000 # estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
t=0
f=1 #kind of process (search mode 1=Breadth and 2=Depth)
x=40 #node length limit
y=10000 #graph depth limit
PATH_TOOLS="" # path were executables mapsembler and phaser are. Leave blank if they are located in a directory located in the PATH environnement variable
#######################################################################
#################### END HEADER                 #######################
#######################################################################


function help {
echo "run_mapsembler_pipeline.sh, a pipelining mapsembler2_extremities, mapsembler2_extend and kissread_g"
echo "Usage: ./run_mapsembler_and_phaser.sh -s <starter.fasta> -r <reads.faste> -t [1/2/3/4]<options>"
echo -e "\t \t -s: file containing starters (fasta)"
echo -e "\t \t -r list of reads separated by space, surrounded by the '\"' character. Note that reads may be in fasta or fastq format, gzipped or not. Example: -r \"data_sample/reads_sequence1.fasta   data_sample/reads_sequence2.fasta.gz\"."
echo -e "\t \t -t: kind of assembly: 1=unitig (fasta), 2=contig (fasta), 3=unitig (graph), 4=contig(graph)"
echo -e "<options>:"
echo -e "\t\t -p prefix. All out files will start with this prefix. Example: -p my_prefix"
echo -e "\t\t -k value. Set the length of used kmers. Must fit the compiled value. Default=31. Example -k 31"
echo -e "\t\t -c value. Set the minimal coverage. Default=5. Example -c 5"
echo -e "\t\t -d value. Set the number of authorized substitutions used while mapping reads on found SNPs. Default=1. Example: -d 1"
echo -e "\t\t -g value. Estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM. Default=10 million. Example: -d 10000000"
echo -e "\t\t -f value. Set the process of search in the graph (1=Breadth  and 2=Depth). Default=1. Example: -f 1"
echo -e "\t\t -x value. Set the maximal nodes length . Default=40. Example: -x 40"
echo -e "\t\t -y value. Set the maximal graph depth . Default=10000. Example: -y 10000"
echo -e "\t\t -h Prints this message and exist"
echo "Any further question: read the readme file or contact us: pierre.peterlongo@inria.fr"
}


#######################################################################
#################### GET OPTIONS                #######################
#######################################################################
while getopts "s:r:t:p:k:c:d:g:f:x:y:h" opt; do
case $opt in
	h)
help
exit
;;
s)
echo "use starter file: $OPTARG" >&2
starters_file=$OPTARG
;;
r)
echo "use read set: $OPTARG" >&2
read_sets=$OPTARG
;;

p)
echo "use prefix=$OPTARG" >&2
prefix_index=$OPTARG
prefix_results=$OPTARG
;;

k)
echo "use k=$OPTARG" >&2
k=$OPTARG
;;

t)
echo "use t=$OPTARG" >&2
t=$OPTARG
;;

c)
echo "use c=$OPTARG" >&2
c=$OPTARG
;;

d)
echo "use d=$OPTARG" >&2
d=$OPTARG
;;

g)
echo "use g=$OPTARG" >&2
g=$OPTARG
;;

f)
echo "use f=$OPTARG" >&2
f=$OPTARG
;;

x)
echo "use x=$OPTARG" >&2
x=$OPTARG
;;

y)
echo "use y=$OPTARG" >&2
y=$OPTARG
;;

\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;

:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
#######################################################################
#################### END GET OPTIONS            #######################
#######################################################################

if [ -z "$starters_file" ] && [ -z "$read_sets" ];
then
help
exit
fi

if [ $t -eq 0 ]
then
echo "-t is mandatory, please fix t in value 1, 2, 3, or 4"
exit
fi

######### CHECK THE k PARITY ##########
rest=$(( $k % 2 ))
if [ $rest -eq 0 ]
then
echo "k=$k is even number, to avoid palindromes we set it to $(($k-1))"
k=$(($k-1))
fi
#######################################

#######################################################################
#################### OPTIONS SUMMARY            #######################
#######################################################################
MY_PATH="`( cd \"$MY_PATH\" && pwd )`"  # absolutized and normalized
if [ -z "$MY_PATH" ] ; then
# error; for some reason, the path is not accessible
# to the script (e.g. permissions re-evaled after suid)
exit 1  # fail
fi
echo -e "\tRunning pipeline Mapsembler2_extremities, Mapsembler2_extend and Kissread_g, in directory "$MY_PATH" with following parameters:"
echo -e "\t\t fasta file containing starters="$starters_file
echo -e "\t\t fasta files containing reads="$read_sets
echo -e "\t\t fasta files generating and containing starter extremities ="$extremities_filename
echo -e "\t\t prefix for index files="$prefix_index
echo -e "\t\t prefix for results files="$prefix_results


echo -e "\t\t k="$k
echo -e "\t\t t="$t
echo -e "\t\t c="$c
echo -e "\t\t d="$d
echo -e "\t\t g="$g
echo -e "\t\t f="$f
echo -e "\t\t x="$x
echo -e "\t\t y="$y
echo -e -n "\t\t starting date="
date
echo
#######################################################################
#################### END OPTIONS SUMMARY        #######################
#######################################################################


#######################################################################
#################### mapsembler_extremeities     #######################
#######################################################################
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads \"$read_sets\" --output $extremities_filename \033[0m"
$PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads "$read_sets" --output $extremities_filename
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler2_extremities --k $k --starters $starters_file --reads $read_sets --output $extremities_filename"
exit
fi


#######################################################################
#################### mapsembler                 #######################
#######################################################################
if [ -f "$extremities_filename" ]
then
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS""mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results\033[0m"
$PATH_TOOLS\mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results"
exit
fi
fi


#######################################################################
#################### kissreads_g and kissreads  #######################
#######################################################################

	if [ $t -gt 2 ]
	then
		echo -e "\n"
		echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads_graph"
			exit
		fi

		echo -e "\n"
		echo -e "\033[31m $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c  -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads_graph"
			exit
		fi

		res_file=$prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json

	else
		echo -e "\n"
		echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta\033[0m"
$PATH_TOOLS\mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -k $k -c $c -d $d -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads"
			exit
		fi

		res_file=$prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta
	fi

		
#/Users/grizk/gassst  -d test_k_31_c_4_coherent.fa  -i /Users/grizk/kissnp2/kissnp2_tool/snp_list_readsnp  -p 100 -w 15 -m 8 -l 0 -r 1 -o temp;

echo "mapsembler done, results are in $res_file"
echo -e -n "\t ending date="
date
echo -e "\t Thanks for using mapsembler - http://http://colibread.inria.fr/mapsembler/"



