GraphViz

Graphviz es una colección de software para representar grafos, muy flexible, pero requiere un cierto esfuerzo de aprendizaje. El paquete del CPAN GraphViz nos ofrece una interfaz sencilla al software original, perdiendo algunas capacidades.

Aquí os muestro el código utilizado para generar este diagrama esquemático de la interfaz entre un factor de transcripción y una molécula de DNA:

Image test_diagram_interface_graph

# graphviz example by Bruno Contreras Moreira
use warnings;
use strict;
use lib "IPC-Run-0.80/lib/";
use lib "GraphViz-2.03/lib/";
use GraphViz;

my %nt_colors = ('A'=>'green','G'=>'orange','C'=>'blue','T'=>'red');

my @interface_contacts = ( # define a set of protein-DNA atomic contacts
'ARG  NH1  A0180 <- 2.89 ->  DG   N7 a0010',  
'ARG  NH2  A0180 <- 2.74 ->  DG   N7 a0010',
'GLU  OE2  A0181 <- 3.30 ->  DC   N4 b0021', 
'ARG  NH2  A0185 <- 2.76 ->  DT   O4 b0020', 
'ARG  NH1  B0180 <- 3.27 ->  DG   O6 b0010', 
'ARG  NH2  B0180 <- 2.74 ->  DG   N7 b0010', 
'GLU  OE2  B0181 <- 2.91 ->  DC   N4 a0021', 
'ARG  NH2  B0185 <- 2.62 ->  DT   O4 a0020'  );

my @DNA_duplex_pairs = ( # define a DNA duplex
'DT a0009 == DA b0024','DG a0010 == DC b0023',
'DT a0011 == DA b0022','DG a0012 == DC b0021',
'DA a0013 == DT b0020','DC a0014 == DG b0019',
'DA a0015 == DT b0018','DT a0016 == DA b0017',
'DA a0017 == DT b0016','DT a0018 == DA b0015',
'DG a0019 == DC b0014','DT a0020 == DA b0013',
'DC a0021 == DG b0012','DA a0022 == DT b0011',
'DC a0023 == DG b0010' );

my $diagram_file = draw_interface_graph('test_diagram',\@interface_contacts,@DNA_duplex_pairs);

print "# $diagram_file\n";

### codigo de la subrutina ###

sub draw_interface_graph
{
	my ($name,$ref_atomic_contacts,@duplex_pairs) = @_;
	
	my $graph_filename = $name."_interface_graph.png";
	
	my ($n_of_nts,$pair,$b,$contact,$nt,$aatom,$aalabel,$rank) = (0);
	my (@strand1,@strand2,@nodes1,@nodes2,%previous_contact,$base);
	
	my $interface = GraphViz->new(name=>'interface',overlap=>'compress');
	
	## -1) separate strands
	foreach $pair (@duplex_pairs)
	{
		if($pair =~ m/D(\w{1}) (\S+) == D(\w{1}) (\S+)/)
		{
			push(@strand1,$1.' '.$2);
			push(@strand2,$3.' '.$4);
			$n_of_nts++;
		}
	}	
	
	## 0) define top and bottom layers, 1 & 4 (2 & 3 are dna strands)
	$interface->add_node('1',rank=>'1',style=>'invis');
	$interface->add_node('4',rank=>'4',style=>'invis');
	
	## 1) define nucleotides in layers 2 and 3, filling indirectly readout residues and adding missing bases
	for($nt=0;$nt<$n_of_nts;$nt++)# 1.1) strand1
	{
		($b,$base) = split(/ /,$strand1[$nt]);
		$nodes1[$nt] = $base;
		$interface->add_node($base,shape=>'hexagon',rank => '2',label=>$b,color=>$nt_colors{$b},style=>'bold');
	}
	
	for($nt=0;$nt<$n_of_nts;$nt++)# 1.2) strand2
	{
		($b,$base) = split(/ /,$strand2[$nt]);
		$nodes2[$nt] = $base;
		$interface->add_node($base,shape=>'hexagon',rank => '3',label=>$b,color=>$nt_colors{$b},style=>'bold');
	}
	
	## 2) invisibly wire the layers
	for($nt=0;$nt<$n_of_nts;$nt++)# 2.1) layer 1->2
	{ 
		$interface->add_edge('1' => $nodes1[$nt],weight=>0,style=>'invis'); # weight necesario para que respete layers
	}
	for($nt=0;$nt<$n_of_nts;$nt++){ $interface->add_edge($nodes2[$nt] => '4',weight=>0,style=>'invis'); }# 2.2) layer 3->4

	## 3) add phosphodiester bonds
	for($nt=1;$nt<$n_of_nts;$nt++)# 3.1) strand1
	{ 
		$interface->add_edge($nodes1[$nt-1] => $nodes1[$nt],constraint=>'false');  
		# constraint necesario para que respete 5'->3'
	}	
	
	my $reverse_n_of_nts = $n_of_nts; # 3.2) strand2
	for($nt=$n_of_nts-1;$nt>=1;$nt--)
	{ 
		$interface->add_edge($nodes2[$nt] => $nodes2[$nt-1],constraint=>'false'); 	
		$reverse_n_of_nts--;
	}

	## 4) add double helix Hbonds
	for($nt=0;$nt<$n_of_nts;$nt++){ $interface->add_edge($nodes1[$nt]=>$nodes2[$nt],style=>'dotted',dir=>'none'); }
	
	## 5) define protein interface atoms and draw contacts: LYS   NZ  A0192 <- 3.05 ->  DG   O6 C0016
	foreach $contact (@{$ref_atomic_contacts})
	{
		my @data = split(/\s+/,$contact);
		$aatom = $data[2].$data[0].".".$data[1];
		$aatom =~ s/([A-Z])0+(.+)/$1$2/;
		$aalabel = $aatom ."=".$data[7]; 
		$base = $data[8]; 
		
		if(grep(/$base/,@nodes1)){ $rank = 1 }
		elsif(grep(/$base/,@nodes2)){ $rank = 4 }
		else{ die "# draw_interface_graph : cannot match base $base\n"; }
			
		$interface->add_node($aatom.$aalabel.$contact,rank=>$rank,shape=>'plaintext',weight=>0,label=>$aalabel,fontsize=>10);
		$interface->add_edge($aatom.$aalabel.$contact=>$base,style=>'solid',constraint=>'false'); 
		
		if($previous_contact{$rank}){ $interface->add_edge($previous_contact{$rank}=>$aatom.$aalabel.$contact,weight=>0,style=>'invis',constraint=>'false');}
		$previous_contact{$rank} = $aatom.$aalabel.$contact;
	}	
	
	## 6) plot graph in both PNG and vectorial PS
	$interface->as_png($graph_filename);
	
	return ($graph_filename);
}

Bruno Contreras-Moreira
http://www.eead.csic.es/compbio