Lipidome Juxtaposition

Visit GitHub Project

use warnings;
use strict;
use List::Util qw(min max); # For Finding Max and Min in a list
use Data::Dumper qw(Dumper);

my $MethodChoice = 4;
# Saving Input Folder Name
my $InputFolderName = $ARGV[0];

# Giving path to LipidMaps Structure Drawing Tools
`export PATH="\$PATH:./bin/*_lipidmapstools/bin"`; # In bash/Linux

# Declaring Variables
my $VarString1;
my $VarString2;
my @VarList1;
my @VarList2;
my ($c1,$c2,$c3,$c4,$d1,$d2,$d3,$d4,$h1,$h2,$h_position,$db_positions1,$db_positions2,$db_positions3,$db_positions4,$i, $c_atoms, $d_bonds);

my @ClassList=(); # Initializing an array to store lipid classes
my @CentroidList=();
my @FAVarList2=(); # Probably not required. Included for Debugging
my $LipidName;
my $LipidNameCopy;
my $centroid;
my $SMILESlistFileName;
my $CentroidListFileName;
my @LipidNamesList=();
my $LipidClass;
my $LipidMapsProgramToUse;
my @Chains; # A Temporary list to store fatty acid chains
my @UnsupportedSpecies; # Array to store unused lipid abbreviations 
my $TempFileName;
my $HighestConc; # Variable to store highest concentration observed in given set of files. Useful for plotting.
my @Lipid; # A list to store all lipid species
my @TempLipidsVarList1;
my @TempLipidsVarList2;
my @TempFileNameList;


my $LysoFlag=0; # If 0, its NOT a lyso species. If 1, its a lyso species.

# Obtaining Names of files present in InputFolder
my @FileNameList = `ls ./$InputFolderName`;

if (scalar(@FileNameList) == 0){ die "\nSomething is wrong, didnt find any files in $InputFolderName folder. Program terminating\n\n";}

# Removing known FileNames ("UserSMILES" and "FA" and "Cer") from @FileNameList
my @TempArray = (); my $file;
foreach $file (@FileNameList) { 
	chomp($file);
	push (@TempArray, $file) if $file !~ m/(UserSMILES|FA|Cer)\.t(xt|ab)/;
}
@FileNameList = ();@FileNameList = @TempArray; 

# Checking for duplicate FileNames in InputFolder
my @UniqFileNameList = uniq(@FileNameList); 
if (scalar(@UniqFileNameList) != scalar(@FileNameList)) { 
	die "\nThere are ",  scalar(@FileNameList) - scalar(@UniqFileNameList), " duplicate file names in your input folder.... program is terminating\n\n";
}

# Saving number of Input files
my $NumOfInputFiles = scalar(@FileNameList);

# Reading UserSMILES file
my %UserSMILES; # Hash table to store UserSMILES
my $count = 0 ;
open UserSMILES_FILE, "./$InputFolderName/UserSMILES.tab";
while (<UserSMILES_FILE>){ $count++;
	$_ =~ s/\R//g; # Removing carraige return and/or new line character
	@_ = split (/\t/,$_); if ( defined($_[0]) or defined($_[1]) ) {} else { die "Unable to parse UserSMILES file at line $count";}
	chomp($_[1]);
	$UserSMILES{$_[1]}="$_[0]";
}
close UserSMILES_FILE; 

# Loading Fatty Acids from File
my @FAlist; # A list containing Fatty Acids
open FA_FILE, "./$InputFolderName/FA.tab" or die "Can\'t open \.\/$InputFolderName\/FA.tab \: $!";
while (<FA_FILE>){
	$_ =~ s/\R//g; # Removing carraige return and/or new line character
	chomp($_);
	@_ = split("\t",$_);
	push (@FAlist, [@_]); 
}
close FA_FILE;

# Loading Sphingosine(s) from File
my @CerList; # A list containing Sphingosoine composition
open CER_FILE, "./$InputFolderName/Cer.txt" or die "Can\'t open \.\/$InputFolderName\/Cer.txt \: $!";
while (<CER_FILE>){
	chomp($_);
	$_ =~ s/\R//g;
	push (@CerList, $_); 
}
close CER_FILE; 

#################################################
#1----------Reading Lipidome Data----------------
#################################################
my @SampleList;
my %DataTable = ();

if ((scalar (@FileNameList)) > 1) {# If multiple input files, read each file in InputFolder and make a combined list of lipid species

	$HighestConc=0; # Variable to store highest concentration observed in given set of files. Useful for plotting.
	foreach $TempFileName (@FileNameList){
		@TempLipidsVarList1=();
		@TempLipidsVarList2=();
		open InFile, "./$InputFolderName/$TempFileName" or die "Can't open file $TempFileName in folder $InputFolderName : $!";
		while (<InFile>){
			$_ =~ s/\R//g; # Removing carraige return and/or new line character
			@_ = split(/\t/,$_); # Separating LipidSpecies Names from their Concentrations
			push @TempLipidsVarList1, $_[0];
			# $_[0] =~ s/(\s|;|:|,)/_/g;
			$DataTable{"$_[0]"}{"$TempFileName"} = $_[1]; # Hash of hash. Autovivification. $DataTable{Molecule Name}{Sample} = Molecule Concentration
			chomp($_[1]);
			if ($HighestConc < $_[1]) { $HighestConc = $_[1];}
		}
		close InFile;

		# Checking if there are any duplicate entries in a given file.
		@TempLipidsVarList2 = uniq(@TempLipidsVarList1);
		if (scalar(@TempLipidsVarList2) != scalar(@TempLipidsVarList1)) { 
			die "\nThere are ",  scalar(@TempLipidsVarList1) - scalar(@TempLipidsVarList2), " duplicate molecule names in file $TempFileName. Program terminating\n\n";
		}
		
		push @Lipid,@TempLipidsVarList1;
	}
	
	
	# Combining lipid species from all files into one single Duplicate-Free list.
	@TempLipidsVarList2 = ();
	@TempLipidsVarList2 = uniq(@Lipid);
	@Lipid = ();
	@Lipid = sort @TempLipidsVarList2; print "No. of unique lipid species = ", scalar(@Lipid), "\n"; 
	my $fh; open $fh, "> ./$InputFolderName/LipidsList.txt"; print $fh join ("\n", @Lipid); close $fh;

	# Writing combined data to a single file
	open $fh, "> ./$InputFolderName/$InputFolderName.tab";
	print $fh join ("\t", @FileNameList);
	my ($MolName, $FileName);
	foreach $MolName (@Lipid){
		print $fh ("\n", $MolName, "\t");
		@TempLipidsVarList2 = ();
		foreach $FileName (@FileNameList) {
			if (defined($DataTable{$MolName}{$FileName})) { push @TempLipidsVarList2, $DataTable{$MolName}{$FileName}; }
			else { push @TempLipidsVarList2, "NA"; }
		}
		print $fh join ("\t", @TempLipidsVarList2);
	}
	close $fh;
	# print "Check files"; <stdin>;
}

elsif ((scalar (@FileNameList)) == 1) { # If only one tab/space/comma delimited input file, read data file and make a list of lipid species, sample names

	$TempFileName = $FileNameList[0];
	$HighestConc=0; # Variable to store highest concentration observed in given set of files. Useful for plotting.

	open my $fh, "./$InputFolderName/$TempFileName" or die "Can't open file $TempFileName in folder $InputFolderName : $!";
	my @lines = map [ split(/[\t,\,]+/) ], <$fh>;
	close $fh;
	
	my $NumSamples = scalar @{$lines[0]}; # print "$NumSamples"; <stdin>;
	my $NumLipids = ((scalar @lines)-1); # print "$NumLipids"; <stdin>;
	for (my $i = 1; $i <= $NumLipids; $i++){
		push @Lipid, @{$lines[$i]}[0];
	} # print $Lipid[0], "\t", $Lipid[-1];<stdin>;
	@{$lines[0]}[-1] =~ s/\R//g;
	@SampleList = @{$lines[0]}; # print $FileNameList[0],"\t", $FileNameList[-1]; <stdin>;

	# Checking if there are any duplicate lipid entries in a given file.
	@TempLipidsVarList1 = uniq(@Lipid);
		if (scalar(@TempLipidsVarList1) != scalar(@Lipid)) { 
			die "\nThere are ", scalar(@TempLipidsVarList1) - scalar(@Lipid), "\nDuplicate molecule names in file $TempFileName. Program terminating\n\n";
		}
	@Lipid = @TempLipidsVarList1;
	
	# Checking if there are any duplicate sample entries in a given file.
	@TempLipidsVarList1 = uniq(@SampleList);
		if (scalar(@TempLipidsVarList1) != scalar(@SampleList)) { 
			die "\nThere are ",  scalar(@TempLipidsVarList1) - scalar(@SampleList), "\nDuplicate sample/experiment names in file $TempFileName. Program terminating\n\n";
		}
	@SampleList = @TempLipidsVarList1;
	
	# Obtaining highest concentration
	my $j;
	for ($i=1; $i<=$NumLipids; $i++) { 
		for ($j=1; $j<=$NumSamples; $j++) {
			if (@{$lines[$i]}[$j] !~ m/NA/){
				if (@{$lines[$i]}[$j] > $HighestConc){$HighestConc = @{$lines[$i]}[$j]; } 
			}
		}
	}
#	print "\n\n\$HighestConc = $HighestConc"; <stdin>;

	# Writing Lipidome data from each sample to a separate file
	$j = 1; my $write2file; my $Filename;
	foreach $Filename (@SampleList){
		open $write2file, "> ./$InputFolderName/$Filename";
		for ($i = 1; $i <= $NumLipids; $i++){
			if (@{$lines[$i]}[$j] !~ /NA/){
				@{$lines[$i]}[$j] =~ s/\R//g;
				print $write2file @{$lines[$i]}[0], "\t", @{$lines[$i]}[$j];
				print $write2file "\n" if ($i < $NumLipids);
			}
		} 
		close $write2file;
		$j++;
	} # print "\nCheck Files.."; <stdin>;
	
}

print "Highest conc = $HighestConc\n";

#################################################
#2---------Generate Lipid Structures-------------
#################################################
my ($PlasmogenFlag, $SN_Flag, $OH_Flag );
my ($dbp1, $dbp2);

open OutFile2, "> ./$InputFolderName/Isomers.txt" or die "Unable to open ./$InputFolderName/Isomers.txt file"; 

foreach (@Lipid){
exit_foreach_lipid_loop: {

	$LipidName=$_;
	chomp($LipidName);
	print "Generating possible structures for $LipidName\n";

	# Opening a file to write isomer possibilities for centroid determination
	open OutFile1, "> ./$InputFolderName/OutFile1.txt";

	$LipidClass = &Get_LipidClass ; 
	# print "Lipid Class = $LipidClass\n";# <stdin>;
	
	$PlasmogenFlag = &CheckPlasmogen;
	$SN_Flag = &Check_SN_composition;
	$OH_Flag = &Get_OH_info_for_Ceramides; # Get information on hydroxylation state for Ceramides. Applicable only if SN1 and SN2 FA composition is not specified.

	my $user_SMILES_found = &Search_UserSMILES;

	if ($user_SMILES_found == 1){
		close OutFile1;
		last exit_foreach_lipid_loop;
	}

	elsif ($LipidClass !~ m/\b(PC|PA|PS|PI|PE|PG|DG|TG|TAG|DAG|CL|IPC|MIPC|MIP2C|Cer|CerP|PICer|HexCer|PECer|CerPE|LCB|LCBP|MIPCer|MIP2Cer|LPC|LPA|LPS|LPI|LPE|LPG|LIPC)\b/){
		push @UnsupportedSpecies, $LipidName;
		last exit_foreach_lipid_loop;
		# print "\nCannot generate structure for this molecule..Press any key to coninue"; <stdin>;
	}

	push (@ClassList, $LipidClass);

	$LysoFlag = &CheckForLyso; # This sub routine also changes the string $LipidClass
	

	# print "\nLipid Class = $LipidClass\n\$PlasmogenFlag = $PlasmogenFlag\n\$SN_Flag = $SN_Flag\n\$LysoFlag = $LysoFlag\n\$OH_Flag = $OH_Flag\n";<stdin>;
	# print "\nPlasmogenFlag = $PlasmogenFlag\n";

	if (($SN_Flag == 1) or ($LysoFlag == 1)) {
		&Parse_and_draw_Isomers;
	}

	else {
		# Predict SN composition and draw PhosphoLipids and DG
		if ($LipidClass =~ m/\b(PC|PA|PS|PI|PE|PG|DG)\b/){ # print "\nPredicting SN composition and drawing PC_DG...\n";
			($c1, $c2, $d1, $d2, $dbp1, $dbp2) = &Predict_SN_and_draw_PL_DG($LipidName, $LipidClass);
		}
		
		# Predict SN composition and draw Ceramides
		elsif ($LipidClass =~ m/\b(Cer|CerP|PICer|HexCer|PECer|MIPCer|MIP2Cer)\b/){ # print "\nPredicting SN composition and drawing Ceramides...\n";
			($c1, $c2, $d1, $d2, $dbp1, $dbp2) = &Predict_SN_and_draw_Cer($LipidName, $LipidClass);
		}

		# Draw TG
		elsif ($LipidClass =~ m/\bTG\b/){ # print "\nPredicting SN composition and drawing TG...\n";
			($c1, $c2, $d1, $d2, $dbp1, $dbp2) = &Predict_SN_and_draw_TG($LipidName, $LipidClass);
		}
		
		# Draw CL
		elsif ($LipidClass =~ m/\bCL\b/){ # print "\nPredicting SN composition and drawing CL...\n";
			($c1, $c2, $d1, $d2, $dbp1, $dbp2) = &Predict_SN_and_draw_CL($LipidName, $LipidClass);
		}
	}

	# Program design generates duplicates for Ceramides, PC/PE TAG etc. (TODO : Modify design so that duplicates are not generated)
	# Deleting duplicates...
	close OutFile1; # print "\nCheck OutFile1 and press any key to continue..."; <stdin>;
	&DelDuplicates;
	&FindCentroidIsomer;
	
}
}
&WriteLists2Files;
close OutFile2;
	
################################################################################
#3------------Calculating pair wise distances between lipid species-------------
################################################################################

my $CSVfilename = "$InputFolderName"."\.csv"; # print "./$InputFolderName/$CSVfilename";<stdin>;
my $LOGfilename = "$InputFolderName"."\.log";
my $ALNfilename = "$InputFolderName"."\.aln";
	
if    ($MethodChoice==1){`perl		./bin/*m1.pl	./$InputFolderName/$SMILESlistFileName				./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;}
elsif ($MethodChoice==2){`python	./bin/*m2.py	./$InputFolderName/$SMILESlistFileName				./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;}
#elsif ($MethodChoice==3){`perl	./bin/*m3.pl	./$InputFolderName/$SMILESlistFileName				./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;}
elsif ($MethodChoice==4){`python	./bin/*m4.py	./$InputFolderName/$SMILESlistFileName				./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;}
elsif ($MethodChoice==5){`python	./bin/*m5.py	./$InputFolderName/$SMILESlistFileName	./bin/mat4	./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;}
elsif ($MethodChoice==6){`perl		./bin/*m6.pl	./$InputFolderName/$SMILESlistFileName	./$InputFolderName/O2.aln	./$InputFolderName/$CSVfilename	./$InputFolderName/$LOGfilename`;
					`rm ./$InputFolderName/O2.aln ./$InputFolderName/O2.aln_joined`;`mv ./$InputFolderName/O2.aln_joined_rearranged ./$InputFolderName/$ALNfilename`;
}

################################################################################
#4--------------------Performing Principal Component Analysis ------------------
################################################################################

my $PCAfilename = "$InputFolderName"."\.pca";
my $VariancePlotFileName = "$InputFolderName"."_VariancePlot"."\.pdf";
my @PCAcoordinates = `Rscript ./bin/121012_PCAfromCSV.R ./$InputFolderName/$CSVfilename ./$InputFolderName/$VariancePlotFileName ./$InputFolderName/$PCAfilename`;

chomp($PCAcoordinates[0]); #print "$PCAcoordinates[0] \n";
chomp($PCAcoordinates[1]); #print "$PCAcoordinates[1] \n";
chomp($PCAcoordinates[2]); #print "$PCAcoordinates[2] \n";
chomp($PCAcoordinates[3]); #print "$PCAcoordinates[3] \n $HighestConc \n"; <stdin>;

# print "\n\nCompleted PCA.. press any key to continue"; <stdin>;
################################################
#5---------Plot 2D and 3D Maps------------------
################################################

my @MultiPlotOutput=();

if (scalar (@FileNameList) == 1) { @FileNameList = (); @FileNameList = @SampleList; }

foreach $TempFileName (@FileNameList){
	@MultiPlotOutput = `Rscript ./bin/130115_PCA_plot_150507Edit2.R "./$InputFolderName/$TempFileName" "./$InputFolderName/$InputFolderName\.pca" "./$InputFolderName/ClassList.txt" "./$InputFolderName/CentroidList.txt" "$PCAcoordinates[0]" "$PCAcoordinates[1]" "$PCAcoordinates[2]" "$PCAcoordinates[3]" "$HighestConc" "./$InputFolderName/$TempFileName"`;
	# print @MultiPlotOutput;
}

# Combined Lipidome Plot
my $SVGfilename_2D = "$InputFolderName"."_2D\.svg";
my $SVGfilename_3D = "$InputFolderName"."_3D\.svg";
`Rscript ./bin/130124_CombinedLipidomePlot_150507Edit2.R "./$InputFolderName/$SVGfilename_2D" "./$InputFolderName/$PCAfilename" "./$InputFolderName/ClassList.txt" "./$InputFolderName/$SVGfilename_3D"`;

# Writing un-analyzed lipid species to <stdout>
print "\nUnused lipid abbreviations\n\n";
print join ("\n", @UnsupportedSpecies);
print "\n\n";

open my $fh, "> ./$InputFolderName/Unused_LipidAbbreviations.txt";
print $fh join ("\n", @UnsupportedSpecies);
close $fh;

################################################################################################
#6----------------Calculating Hausdroff distance and ploting Historgrams------------------------
################################################################################################

my $FileNameListRef = \@FileNameList;
my $FileNameList = join("XXXX", @FileNameList); # print "$FileNameList\n";

# Calculating Hausdroff distance and Average distance from PC1 PC2 Coordinates
 `perl ./bin/130117_3_MaxDist_and_AvgDist_from_PC1_PC2.pl $InputFolderName $PCAfilename $FileNameList`; 

# Calculating Hausdroff distance and Average distance from PC1 PC2 PC3 Coordinates
# $FileNameList = join("XXXX", @FileNameList);
 `perl ./bin/130123_2_MaxDist_and_AvgDist_from_PC1_PC2_PC3.pl $PCAfilename $FileNameList $InputFolderName`;

# Calculating Hausdroff distance and Average distance from Similarity Scores
# $FileNameList = join("XXXX", @FileNameList);
 `perl ./bin/140805_2_MaxDist_and_AvgDist_from_SimilarityScore.pl $CSVfilename $FileNameList $InputFolderName`;

# Creating Dendrograms as pdf - one per page
 `Rscript ./bin/130117_Hdist2Dendro_150629_Edit.R $InputFolderName "./$InputFolderName/LUX_PC1_PC2.csv" "./$InputFolderName/LUX_PC1_PC2_PC3.csv" "./$InputFolderName/LUX_SimilarityScore.csv"`;

`rm ./$InputFolderName/CalcStats`;
###########################################
#----------- Sub-routine's ----------------
###########################################

sub uniq {

	my @InArray = @_;
	my @unique = ();
	my %seen   = ();
	foreach my $elem ( @InArray ) {
		next if $seen{ "$elem" }++;
		push @unique, $elem;
	}

	return (@unique);
	# return keys %{{ map { $_ => 1 } @_ }};  
}   

sub ModifySMILES {

	$_ = $_[0];

	$_ =~ s/\[C@\@H\]/C/g;			# Removing steriochemistry for chiral carbon
	$_ =~ s/\[C\@H\]/C/g;
	$_ =~ s/\[C\@\@]/C/g;
	$_ =~ s/\[C\@]/C/g;

	$_ =~ s/\\//g;					# Forward and backward slashes (meaning - cis - trans) are removed
	$_ =~ s/\///g;

	$_ =~ s/\[O\-\]/O/g;			# Removing charges for Oxygen and Nitrogen atoms
	$_ =~ s/\[N\+\]/N/g;	

	return ($_);
}

sub Search_UserSMILES {
	my ($key,$LipidNameCopy);
	my $count = 0;
	$LipidNameCopy = $LipidName;
	foreach $key (keys (%UserSMILES)) { 
		if ($key =~ m/\b\Q$LipidName\E\b/){ $count++;
			# Add2Lists
			$LipidNameCopy =~ s/(\s|;|:|,)/_/g;
			push (@CentroidList, "$LipidNameCopy\tUserDefinedSMILES\t0"); 
			push (@LipidNamesList, "$UserSMILES{$LipidName} $LipidNameCopy");
			print "User defined SMILES taken for this molecule\n";
		}
	}
	
	if ($count > 1){
		print "\nFound duplicate molecule name in user defined SMILES list\n" and die "Program Terminating";
	}
	
	return ($count);
}

sub Get_LipidClass {
	if ($LipidName =~ m/\s/){
		@_ = split /(\s|_)/ , "$LipidName";
		$LipidClass = $_[0]; #print "\$LipidClass at line 438  = $LipidClass";<stdin>;
	}
}


sub CheckPlasmogen {
	$PlasmogenFlag = 0;
	if ($LipidName =~ m/_O/){ # print "Encountered O-";<stdin>;
		$PlasmogenFlag = 1;
		# $LipidName =~ s/\sO-/\_O/; # print $LipidClassDup;<stdin>;
	}
	if ($LipidName =~ m/_P/){ 
		$PlasmogenFlag=2;
		# $LipidName =~ s/\sP-/\_P/;
	}
	return ($PlasmogenFlag);
}

sub Check_SN_composition {

	@_ = split /\s/ , "$LipidName";
	my @SN_Chains = split /\// , $_[1] if (defined($_[1]));

	if (defined($SN_Chains[1])){
		$SN_Flag = 1;
	}
	else {
		$SN_Flag = 0;
	}
	
	return ($SN_Flag);
}

sub CheckForLyso { 	# Finding out of a lipid species is a lyso molecule or not.

	$LysoFlag = 0;
	if ($LipidClass =~ m/^TAG/) {$LipidClass =~ s/TAG/TG/;}
	elsif ($LipidClass =~ m/^DAG/) {$LipidClass =~ s/DAG/DG/;}
	elsif ($LipidClass =~ m/^IPC/) {$LipidClass =~ s/IPC/PICer/;}
	elsif ($LipidClass =~ m/^CerPE/) {$LipidClass =~ s/CerPE/PECer/;}
	elsif ($LipidClass =~ m/^MIPC/) {$LipidClass =~ s/^MIPC/MIPCer/;}
	elsif ($LipidClass =~ m/^M\(IP\)2C/) {$LipidClass =~ s/^M\(IP\)2C/MIP2Cer/;}
	elsif ($LipidClass =~ m/^MIP2C/) {$LipidClass =~ s/^MIP2C/MIP2Cer/;}
	elsif ($LipidClass =~ m/^LCB$/) {$LipidClass =~ s/LCB/Cer/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LCBP$/) {$LipidClass =~ s/LCBP/CerP/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LPC/) {$LipidClass =~ s/LPC/PC/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LPA/) {$LipidClass =~ s/LPA/PA/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LPS/) {$LipidClass =~ s/LPS/PS/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LPI/) {$LipidClass =~ s/LPI/PI/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LPE/) {$LipidClass =~ s/LPE/PE/; $LysoFlag=1;} # Its a lyso molecule
	elsif ($LipidClass =~ m/^LIPC/) {$LipidClass =~ s/LIPC/PICer/; $LysoFlag=1;} # Its a lyso molecule
	
	return ($LysoFlag);
}

sub Get_OH_info_for_Ceramides{
	$OH_Flag = 0;
	if ($LipidName =~ m/C_dt/) { 
		$OH_Flag = 3;
		# $LipidClass =~ s/C_dt/C/;print "\n\$LipidClass = $LipidClass\n";
	}
	elsif ($LipidName =~ m/C_d/) {
		$OH_Flag = 1;
		# $LipidClass =~ s/C_d/C/;print "\n\$LipidClass = $LipidClass\n";
	}
	elsif ($LipidName =~ m/C_t/) {
		$OH_Flag = 2;
		# $LipidClass =~ s/C_t/C/;print "\n\$LipidClass = $LipidClass\n";
	}
	
	return ($OH_Flag);
}

sub Parse_and_draw_Isomers {
	
	chomp($LipidName);
	my $LipidNameCopy = $LipidName; # making a copy of lipid name to use it later (while generating CentroidList and SMILESlist files )

	# Breaking lipid name to extract fatty acid chain information
	my @LipidNameSplit;
	if ($LipidName =~ m/\s/){ # Any lipid except sterols
		@LipidNameSplit = split /\s/ , "$LipidName";
		@Chains = split /\// , $LipidNameSplit[1];
	}
	if (($LipidClass =~ m/Cer/) and defined($LipidNameSplit[2])){ # print "\n \$LipidNameSplit\[2\] = $LipidNameSplit[2]\n"; 
		if ($LipidNameSplit[2] == 1){$h2 = 1;}
		elsif ($LipidNameSplit[2] == 2){$h2 = 2;}
		else {print "\nUnknown text found in Lipid name\n" and die "Program Terminating !!";}
	}

	# Identifying number of c'atoms and d'bonds for (L)P(C|A|S|I|E|G) and DaG and Cer(P) CerPE HexCer
	if ($LipidClass =~ m/\b(PC|PA|PS|PI|PE|PG|DG|Cer|CerP|PICer|HexCer|PECer|MIPCer|MIP2Cer)\b/){

		# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[0]);
		($c1,$d1) = ($_[0], $_[1]); # print "\nc1 = $c1 and d1 = $d1\n";<stdin>;
		if (defined($_[2]) and $LipidClass =~ m/Cer/) { 
			if ($_[2] == 1){$h1 = 0;}
			elsif ($_[2] == 2){$h1 = 1;}
			elsif ($_[2] == 3){$h1 = 2;}
			else {print "\nOH position in LongChainBase is not parsed for molecule $LipidName\n" and die "Program Terminating\n";}
			# print "\$_\[2\] = $_[2]   \$h1 = $h1\n";
		} # Applicable to Cermaides
		
		if ($LysoFlag == 1) {
			$c2 = 0;
			$d2 = 0;
			if ($LipidClass =~ m/Cer/){ $h2 = 0;}
		}
		
		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		elsif ($LysoFlag == 0) {
			@_ = split("\:|\;", $Chains[1]);
			($c2,$d2) = ($_[0], $_[1]);
			if (($LipidClass =~ m/Cer/) and defined($_[2])){ $h2 = $_[2];}
		}
		
		foreach (@FAlist){
			if (($_->[0] == $c1) and ($_->[1] == $d1)) {
				if($d1>0){ 
					if (defined($_->[2])){ 
						$db_positions1= $_->[2];
					}
					else {
						$db_positions1="";
					}
				}else{
					$db_positions1="";
				}
				foreach (@FAlist){
					if (($_->[0] == $c2) and ($_->[1] == $d2)) {
						if($d2>0){
							if (defined($_->[2])){
								$db_positions2= $_->[2];
							}
							else {
								$db_positions2="";
							}
						}else{
							$db_positions2="";
						}
						if ($LipidClass =~ m/DG/){
							print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\/0\:0\)\n";
							print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\/0\:0\)\n"; # Xchanging SN1 and SN2 positions
						}elsif ($LipidClass =~ m/Cer/){
							# print "c1 = $c1, d1 = $d1,db_positions1 = $db_positions1, c2 = $c2,d2 = $d2, db_positions2 = $db_positions2";
							&GoToCer;
						}else{
							print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
							# If its not a Lyso molecule, then exchange SN1 and SN2 positions
							if ($LysoFlag != 1){ 
								# Xchanging SN1 and SN2 positions
								print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\)\n"; 
							}
						}
					}
				}
			}
		}
	}
	
	# For TaG's only
	elsif ($LipidClass =~ m/TG/){
		
		# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[0]);	
		($c1,$d1) = ($_[0], $_[1]);
		
		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[1]);
		($c2,$d2) = ($_[0], $_[1]);
		
		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[2]);
		($c3,$d3) = ($_[0], $_[1]);

		foreach (@FAlist){
			if (($_->[0] == $c1) and ($_->[1] == $d1)) {
				if($d1>0){$db_positions1= $_->[2];}else{$db_positions1="";}
				foreach (@FAlist){
					if (($_->[0] == $c2) and ($_->[1] == $d2)) {
						if($d2>0){$db_positions2= $_->[2];}else{$db_positions2="";}
						foreach (@FAlist){
							if (($_->[0] == $c3) and ($_->[1] == $d3)) {
							if($d3>0){$db_positions3= $_->[2];}else{$db_positions3="";}
								print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\)\n";
								print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\)\n";
								print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\)\n";
								print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\)\n";
								print OutFile1 "$LipidClass\($c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
								print OutFile1 "$LipidClass\($c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\)\n";
							}
						}
					}
				}
			}
		}
	}									

	# For CL's only
	elsif ($LipidClass =~ m/CL/){

		# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[0]);
		($c1,$d1) = ($_[0], $_[1]);
		
		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[1]);
		($c2,$d2) = ($_[0], $_[1]);
		
		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[2]);
		($c3,$d3) = ($_[0], $_[1]);

		# Splitting string "chain_2" to obtain 1) No. of carbon atoms 2) No. of double bonds
		@_ = split("\:|\;", $Chains[3]);
		($c4,$d4) = ($_[0], $_[1]);

		foreach (@FAlist){
			if (($_->[0] == $c1) and ($_->[1] == $d1)) {
				if($d1>0){$db_positions1= $_->[2];}else{$db_positions1="";}
				foreach (@FAlist){
					if (($_->[0] == $c2) and ($_->[1] == $d2)) {
					if($d2>0){$db_positions2= $_->[2];}else{$db_positions2="";}
					foreach (@FAlist){
							if (($_->[0] == $c3) and ($_->[1] == $d3)) {
							if($d3>0){$db_positions3= $_->[2];}else{$db_positions3="";}
								foreach (@FAlist){
									if (($_->[0] == $c4) and ($_->[1] == $d4)) {
									if($d4>0){$db_positions4= $_->[2];}else{$db_positions4="";}

										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\)\n";

										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\)\n";

										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\)\n";

										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\)\n";
										print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\)\n";
									}
								}
							}
						}
					}
				}
			}
		}
	}
}

sub GoToCer { 
	if (defined($h2)){
		if ($h2==0){ $h_position = "";}
		elsif ($h2==1){ $h_position = "(2OH)";}
		elsif ($h2==2){ $h_position = "(2OH,4OH)";}
	}	
	else {$h_position = "";} # print "\$LipidClass = $LipidClass";<stdin>;

	if ($h1==0){ print OutFile1 "$LipidClass\(m$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";}
	elsif ($h1==1){ print OutFile1 "$LipidClass\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";}
	elsif ($h1==2){ print OutFile1 "$LipidClass\(t$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";}
}

sub Predict_SN_and_draw_PL_DG{

	my $PlasmogenText;
	if ($PlasmogenFlag == 0){ $PlasmogenText = "";}
	elsif ($PlasmogenFlag == 1){ $PlasmogenText = "O-";}
	elsif ($PlasmogenFlag == 2){ $PlasmogenText = "P-";}

	# Breaking lipid name to extract fatty acid chain information
	if ($LipidName =~ m/\s/){
		@_ = split /\s/ , "$LipidName";
		@Chains = split /\// , $_[1];
	}

	# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
	@_ = split("\:|\;", $Chains[0]);
	($c_atoms,$d_bonds) = ($_[0], $_[1]);

	foreach (@FAlist){
		($c1, $d1, $db_positions1) = ($_->[0], $_->[1], $_->[2]); # NoC1 = No. of Carbon atoms in FA 1
		if (defined($db_positions1)){}else{$db_positions1="";}
		foreach (@FAlist){
			($c2, $d2, $db_positions2) = ($_->[0], $_->[1], $_->[2]);
			if (defined($db_positions2)){}else{$db_positions2="";}
			if ( (($c1+$c2)== $c_atoms) and (($d1+$d2) == $d_bonds) and ($c1!=0) and ($c1!=0) ) {
				if ($LipidClass =~ m/DG/){
					print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\/0\:0\)\n";
					print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\/0\:0\)\n"; # Xchanging SN1 and SN2 positions
				}else{
					print OutFile1 "$LipidClass\($PlasmogenText$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
					# If its not a Lyso molecule, then exchange SN1 and SN2 positions
					if ($LysoFlag != 1){ 
						# Xchanging SN1 and SN2 positions
						print OutFile1 "$LipidClass\($PlasmogenText$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\)\n"; 
					}
				}
			}
		}
	}
}

sub Predict_SN_and_draw_TG{
	# Breaking lipid name to extract fatty acid chain information
	if ($LipidName =~ m/\s/){
		@_ = split /\s/ , "$LipidName";
		@Chains = split /\// , $_[1];
	}

	# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
	@_ = split("\:|\;", $Chains[0]);
	($c_atoms,$d_bonds) = ($_[0], $_[1]);

	foreach (@FAlist){
		($c1, $d1, $db_positions1) = ($_->[0], $_->[1], $_->[2]); # NoC1 = No. of Carbon atoms in FA 1
		if (defined($db_positions1)){}else{$db_positions1="";}
		foreach (@FAlist){
			($c2, $d2, $db_positions2) = ($_->[0], $_->[1], $_->[2]); 
			if (defined($db_positions2)){}else{$db_positions2="";}
			foreach (@FAlist){
				($c3, $d3, $db_positions3) = ($_->[0], $_->[1], $_->[2]); 
				if (defined($db_positions3)){}else{$db_positions3="";}
				if ( (($c1+$c2+$c3)== $c_atoms) and (($d1+$d2+$d3) == $d_bonds) and ($c1!=0) and ($c2!=0) and ($c3!=0) ) {
					print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\)\n";
					print OutFile1 "$LipidClass\($c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\)\n";
					print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\)\n";
					print OutFile1 "$LipidClass\($c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\)\n";
					print OutFile1 "$LipidClass\($c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
					print OutFile1 "$LipidClass\($c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\)\n";
				}
			}
		}
	}
}	

sub Predict_SN_and_draw_CL{
	# Breaking lipid name to extract fatty acid chain information
	if ($LipidName =~ m/\s/){
		@_ = split /\s/ , "$LipidName";
		@Chains = split /\// , $_[1];
	}

	# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
	@_ = split("\:|\;", $Chains[0]);
	($c_atoms,$d_bonds) = ($_[0], $_[1]);

	foreach (@FAlist){
		($c1, $d1, $db_positions1) = ($_->[0], $_->[1], $_->[2]); # NoC1 = No. of Carbon atoms in FA 1
		if (defined($db_positions1)){}else{$db_positions1="";}
		foreach (@FAlist){
			($c2, $d2, $db_positions2) = ($_->[0], $_->[1], $_->[2]); 
			if (defined($db_positions2)){}else{$db_positions2="";}
			foreach (@FAlist){
				($c3, $d3, $db_positions3) = ($_->[0], $_->[1], $_->[2]); 
				if (defined($db_positions3)){}else{$db_positions3="";}
				foreach (@FAlist){
					($c4, $d4, $db_positions4) = ($_->[0], $_->[1], $_->[2]); 
					if (defined($db_positions4)){}else{$db_positions4="";}
					if ( (($c1+$c2+$c3+$c4)== $c_atoms) and (($d1+$d2+$d3+$d4) == $d_bonds) and ($c1!=0) and ($c2!=0) and ($c3!=0) and ($c4!=0)) {
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\)\n";

						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\)\n";

						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c4\:$d4$db_positions4\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c3\:$d3$db_positions3\/$c4\:$d4$db_positions4\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\)\n";

						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c1\:$d1$db_positions1\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c2\:$d2$db_positions2\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c3\:$d3$db_positions3\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c2\:$d2$db_positions2\]\, 3\'\-\[$c3\:$d3$db_positions3\/$c1\:$d1$db_positions1\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\]\)\n";
						print OutFile1 "$LipidClass\(1\'\-\[$c4\:$d4$db_positions4\/$c3\:$d3$db_positions3\]\, 3\'\-\[$c2\:$d2$db_positions2\/$c1\:$d1$db_positions1\]\)\n";
					}
				}
			}
		}
	}
}	
	
sub Predict_SN_and_draw_Cer {

	my $total_OH_in_cer; # Total number of hydroxylations in the ceramide molecule. Andrej Drosophila Lipidome data uses this format
	# Breaking lipid name to extract fatty acid chain information
	my @LipidNameSplit;
	if ($LipidName =~ m/\s/){ # Any lipid except sterols
		@LipidNameSplit = split /\s/ , "$LipidName";
		@Chains = split /\// , $LipidNameSplit[1];
	}
	if (($LipidClass =~ m/Cer/) and defined($LipidNameSplit[2])){ # Applicable for Howard's yeastlipidome data. This parsing rule minimises complexity in nomencature. Not recommened 
	# print "\n \$LipidNameSplit\[2\] = $LipidNameSplit[2]\n"; 
		if ($LipidNameSplit[2] == 1){$h2 = 1;}
		elsif ($LipidNameSplit[2] == 2){$h2 = 2;}
		else {print "\nUnknown text found in Lipid name\n" and die "Program Terminating !!";}
	}

	# Splitting string "chain_1" to obtain 1) No. of carbon atoms 2) No. of double bonds
	@_ = split("\:|\;", $Chains[0]);
	($c_atoms,$d_bonds) = ($_[0], $_[1]);
	
	if (defined ($_[2])) { # Applicable for Andrej's Drosophila lpidome data ONLY	

		$total_OH_in_cer = $_[2]; # Total number of hydroxylations in the ceramide molecule. Andrej Drosophila Lipidome data uses this format

		# For Cer/CerPE/HexCer only... 
		if ($LipidClass =~ m/(Cer|PECer|HexCer)/){ # print "Writing possiblities for Cer/CerPE...";<stdin>;
			$LipidClass =~ s/HexCer/GalCer/; # Giving Galactosyl Ceramide as one possibility for HexCer

			foreach (@CerList){
				$c1 = $_; # print "\n\$c1 = $c1..\n";
				$d1 = "0";
				$db_positions1 = "";
				foreach (@FAlist){
					($c2, $d2, $db_positions2) = ($_->[0], $_->[1], $_->[2]); # NoC1 = No. of Carbon atoms in FA 1
					if (defined($db_positions2)) {} else {$db_positions2="";}

					if ((($c1+$c2)== $c_atoms) and ($d1+$d2 == $d_bonds) and ($c1!=0) and ($c2!=0))  {

						if ($total_OH_in_cer == 2) { 
							# If there are two hydroxylations, then two possibilities are taken into consideration
							# 1. Both in head group (3,4)
								print OutFile1 "$LipidClass\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
								# Giving Glucosyl Ceramide as second possibility for HexCer
								if ($LipidClass =~ m/GalCer/){
									print OutFile1 "GlcCer\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
								}
							# 2. One in head group (3); second in Fatty acid group (at 2 nd carbon atom from COOH end)
								print OutFile1 "$LipidClass\(m$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\(2OH\)\)\n";
								# Giving Glucosyl Ceramide as second possibility for HexCer
								if ($LipidClass =~ m/GalCer/){
									print OutFile1 "GlcCer\(m$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\(2OH\)\)\n";
								}
						}
						elsif ($total_OH_in_cer == 3) {
							# If there are two hydroxylations, then the positions of these hydroxylations are fixed
							# two in Head group ; one in F'Acid chain
								print OutFile1 "$LipidClass\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\(2OH\)\)\n";
								# Giving Glucosyl Ceramide as second possibility for HexCer
								if ($LipidClass =~ m/GalCer/){
									print OutFile1 "GlcCer\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\(2OH\)\)\n";
								}
						}
					}

				}
			}
		}
		else {
			print "\nUnknown LipidClass $LipidClass in molecule $LipidName" and die "Program Terminating";
		}

	}

	else { # print "I am at line 965\n";
		foreach (@CerList){
			$c1 = $_; # print "\n\$c1 = $c1..\n";
			$d1 = "0";
			$db_positions1 = "";
			foreach (@FAlist){
				($c2, $d2, $db_positions2) = ($_->[0], $_->[1], $_->[2]); # NoC1 = No. of Carbon atoms in FA 1
				if (defined($db_positions2)) {} else {$db_positions2="";}

				if ((($c1+$c2)== $c_atoms) and ($d1+$d2 == $d_bonds) and ($c1!=0) and ($c2!=0))  {
					if ($OH_Flag == 0){ # $OH_Flag here is similar to $h1. $OH_Flag and $h1 represent no. of hydroxylations in LongChainBase
						print OutFile1 "$LipidClass\(m$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";
					}
					elsif ($OH_Flag==1){ print OutFile1 "$LipidClass\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";}
					elsif ($OH_Flag==2){ print OutFile1 "$LipidClass\(t$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";}
					elsif ($OH_Flag==3){ 
						print OutFile1 "$LipidClass\(t$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2\)\n";
						print OutFile1 "$LipidClass\(d$c1\:$d1$db_positions1\/$c2\:$d2$db_positions2$h_position\)\n";
					}
				}
			}
		}
	}
}

sub DelDuplicates {
	open FILEA, "./$InputFolderName/OutFile1.txt" or die "\n$! Unable to open OutFile1.txt\n";
	my @list3=<FILEA>;
	close FILEA;
	@list3 = uniq(@list3);
	open FILEB, "> ./$InputFolderName/OutFile1.txt";
	print FILEB @list3;
	close FILEB;
}

sub FindCentroidIsomer {
	# Generating MOL-MOL file for all possible structures using LipidMaps Structure Drawing Tools
	if ($LipidClass =~ m/(TG|DG)/){ $LipidMapsProgramToUse = "GLStrGen.pl";}
	elsif ($LipidClass =~ m/(Cer|PICer|PECer|HexCer|MIPCer|MIP2Cer)/){ $LipidMapsProgramToUse = "SPStrGen.pl";}
	elsif ($LipidClass =~ m/(PC|PA|PS|PI|PE|PG)/){ $LipidMapsProgramToUse = "GPStrGen.pl";}
	elsif ($LipidClass =~ m/CL/){ $LipidMapsProgramToUse = "CLStrGen.pl";} # print "\n\$LipidMapsProgramToUse = $LipidMapsProgramToUse"; <stdin>;
	else {print "\nI could not figure out the LipidMaps Structure Drawing program to use\n" and die "Program Terminating\n";}
	
	`perl ./bin/*_lipidmapstools/bin/$LipidMapsProgramToUse -m AbbrevFileName ./$InputFolderName/OutFile1.txt -r ./$InputFolderName/OutFile1`;
	# `mv OutFile1.sdf ./$InputFolderName/OutFile1.sdf`;
	
	# Converting MOL-MOL structures to SMILES using "OpenBabel : Molecule Conversion package"
	`babel -isdf ./$InputFolderName/OutFile1.sdf -osmi ./$InputFolderName/OutFile1.smi -d -b `;  # print "check 01.smi file";<stdin>;

	# Finding Centroid structure
		# Changing molecule names (because of limitation in ASCII characters that can be used as Molecule names

		open OutFile3, "./$InputFolderName/OutFile1.smi";
		open O2, "> ./$InputFolderName/O2.smi";
		$i=0;
		@VarList1=(); # Array to store SMILES strings
		@VarList2=(); # Array to store Molecule names
		while (<OutFile3>){
			($VarString1, $VarString2) = split("\t", $_);
			$VarString1 = &ModifySMILES($VarString1);
			push (@VarList1, "$VarString1");
			chomp($VarString2);
			push (@VarList2, "$VarString2");
			print O2 "$VarString1 $i\n";
			$i++;
		}
		close O2;  # print "check 02.smi file";<stdin>;
		close OutFile3;	

	# If there are no possible sructures..
	
	if ($i == 0){ 
		print STDERR "\nUnable to generate structures from Fatty Acid possibilities defined in ./$InputFolderName/FA file\n" ; 
		push @UnsupportedSpecies, $LipidName;
#		`rm ./$InputFolderName/OutFile1.sdf ./$InputFolderName/OutFile1.smi ./$InputFolderName/OutFile1.txt ./$InputFolderName/O2.smi`;
	}
	
	# If there is only one possible structure for a given lipid species, take that structure as default centroid structure
	elsif ($i == 1) {$centroid = 0;} 
	
	# If there is only TWO possible structure for a given lipid species, choose first one as centroid structure
	elsif ($i == 2) {$centroid = 0;} 
	# If there is only TWO possible structure for a given lipid species, randomly choose one of them as centroid structure
	# elsif ($i == 2) {$centroid = int(rand(2));} 

	# If there are more than TWO possible structures, find centroid  structure using any of the six similarity scoring methods
	else {
	
		if    ($MethodChoice==1){`perl		./bin/*m1.pl	./$InputFolderName/O2.smi					./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;}
		elsif ($MethodChoice==2){`python	./bin/*m2.py	./$InputFolderName/O2.smi					./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;}
		#elsif ($MethodChoice==3){`perl	./bin/*m3.pl	./$InputFolderName/O2.smi					./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;}
		elsif ($MethodChoice==4){`python	./bin/*m4.py	./$InputFolderName/O2.smi					./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;}
		elsif ($MethodChoice==5){`python	./bin/*m5.py	./$InputFolderName/O2.smi	./bin/mat4		./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;}
		elsif ($MethodChoice==6){`perl		./bin/*m6.pl	./$InputFolderName/O2.smi	./$InputFolderName/O2.aln	./$InputFolderName/O2.csv	./$InputFolderName/O2.log`;
						`rm ./$InputFolderName/O2.aln ./$InputFolderName/O2.aln_joined ./$InputFolderName/O2.aln_joined_rearranged`;}

		# print "check csv file";<stdin>;
		# input *.csv file into centroid.pl program
		# get the center molecule and it's SMILES
		# $centroid =`echo "./$InputFolderName/O2.csv" | perl ./bin/centroid2.pl;`; print "$centroid";<stdin>;
		$centroid =`perl ./bin/centroid2.pl ./$InputFolderName/O2.csv`; # print "\$centroid = $centroid";

		# Deleting temporary files..
		`rm ./$InputFolderName/O2.csv`;
	
	}

	# Deleting temporary files..
	`rm ./$InputFolderName/OutFile1.sdf ./$InputFolderName/OutFile1.smi ./$InputFolderName/OutFile1.txt ./$InputFolderName/O2.smi`;

	if ($i != 0) {&Add2Lists;}
}

sub Add2Lists {

	# To CentroidList
	$LipidName =~ s/(\s|;|:|,)/_/g;
	$_ = scalar(@VarList2);
	push (@CentroidList, "$LipidName\t$VarList2[$centroid]\t$_"); 
	print OutFile2 "$LipidName\t";
	print OutFile2 join ("\t", @VarList2); 
	print OutFile2 "\n";
	
	# To SMILESlist
	push (@LipidNamesList, "$VarList1[$centroid] $LipidName"); 
}

sub WriteLists2Files{

	# Write CentroidList to File
	$CentroidListFileName = "CentroidList\.txt";
	open O3, "> ./$InputFolderName/$CentroidListFileName" or die "Unable to open ./$InputFolderName/$CentroidListFileName file";
	print O3 join ("\n", @CentroidList); 
	close O3;

	# Write SMILESlist to File
	$SMILESlistFileName = "SMILESlist\.smi";
	open O4, "> ./$InputFolderName/$SMILESlistFileName";
	print O4 join ("\n", @LipidNamesList); 
	close O4; 

	# Write ClassList to File
	@ClassList = uniq(@ClassList); # removing duplicate classes
	open O5, "> ./$InputFolderName/ClassList.txt";
	print O5 join ("\n", @ClassList);
	print O5 "\n";
	close O5;
}

__END__

Avatar
Nara C. Marella
Senior Data Scientist & Data Manager

Biomedicine, Informatics, Life Sciences, Algorithms, Advanced Therapy Medicinal Products, Drug Discovery, Research Data Management, Laboratory Information Management Systems, Data Science, Team Management, Natural Sciences, IT Consulting, Statistics, Metabolomics, Proteomics, Transcriptomics, Machine Learning, Artificial Intelligence, Quality Control, Programming, Python, R, Volunteering, Graphic Design, Teaching, Problem Solving

Next
Previous