This document describes how SNP genotypes from 135 landraces of the Spanish Barley Core Collection (SBCC) grouped into 4 subpopulations, were used to obtain XtX statistics by CP Cantalapiedra.

Rebuilding data files to obtain subpopulation-based bayenv input files

Files for covariance matrix

cd xtx_subpops
./bayenv_lines_to_subpops.py SBCC_order.subpops.tab ../SBCC_order.txt ../SBCC_nr_SNPs.tsv show \
> SBCC_nr_subpops.tsv 2> SBCC_nr_subpops.err

Files for bayenv XtX analysis

cd xtx_subpops
./bayenv_lines_to_subpops.py SBCC_order.subpops.tab ../SBCC_order.txt ../SBCC_9K_SNPs.tsv show \
> SBCC_9K_subpops.tsv 2> SBCC_9K_subpops.err

NOTE: the “show” option is used to include in the output file those SNPs with missing data in some of the genotypes. The opposite could be obtained using “hide” as option.

Running bayenv2 XtX analyses

Following the work of Rusell et al 2016, 200K MCMC steps are performed.

First, a dummy climate file is required in order to run bayenv:

cd xtx_subpops
cat ../SBCC_environfile.tsv | head -1 | awk '{print $1"\t"$2"\t"$3"\t"$4}' > envfile.dummy

A covariance matrix is also required. Here it is computed 10 times:

cd xtx_subpops
mkdir -p matrices/raw/

# Create 10 covariance matrices, this will take some time
for i in {1..10}; do
    rnd=$(perl -e 'printf("%05d",rand(99999))');
    echo $rnd >> bayenv.covar.rnd_seeds
    mkdir _job$i; cd _job$i; ln -s ../SBCC_nr_subpops.tsv .; \
    ../soft/bayenv2/bayenv2 -i SBCC_nr_subpops.tsv -p 4 -k 100000 -r $rnd \
    > ../SBCC_nr_subpops_matrix_it100K_$i.out & 
    cd ..;
done;

# We can now put the resulting files away:
rm -rf _job*
gzip SBCC_nr_subpops_matrix_it100K_*
mv SBCC_nr_subpops_matrix_it100K_* matrices/raw/

# Extract final matrix from each replicate
for i in {1..10}; do 
  zcat matrices/raw/SBCC_nr_subpops_matrix_it100K_$i.out.gz | \
  perl -lne 'if(/ITER = 100000/){$ok=1}elsif($ok){ print }' \
  | sed '/^$/d' > matrices/SBCC_nr_subpops_matrix_$i.txt
done

A single average matrix can be computed as follows:

setwd(xtx_subpops)

# read all final matrices
m1 = as.matrix( read.table(file="matrices/raw/SBCC_nr_subpops_matrix_1.txt", header=F) )
m2 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_2.txt", header=F) )
m3 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_3.txt", header=F) )
m4 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_4.txt", header=F) )
m5 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_5.txt", header=F) )
m6 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_6.txt", header=F) )
m7 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_7.txt", header=F) )
m8 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_8.txt", header=F) )
m9 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_9.txt", header=F) )
m10 = as.matrix( read.table(file="matrices/SBCC_nr_subpops_matrix_10.txt", header=F) )

# make a list of matrices and get mean as explained in:
mat_list = list( m1, m2, m3, m4, m5, m6, m7, m8, m9, m10 )
mean_mat = apply(simplify2array(mat_list), c(1,2), mean)

# write resulting mean cov matrix
write.table(mean_mat,file="matrices/SBCC_nr_subpops_matrix_mean.txt",
            sep="\t",row.names=F,col.names=F,quote=F)

This should create file xtx_subpops/matrices/SBCC_nr_subpops_matrix_mean.txt

Now three replicates of bayenv XTX will be run:

cd xtx_subpops

# 1st replicate run
rnd=$(perl -e 'printf("%05d",rand(99999))');
echo "$rnd" > SBCC_9K_subpops.tsv.xtx.rep1.rnd

../soft/bayenv2/calc_xtx_parallel.pl 5 bayenv2 -X -t -i SBCC_9K_subpops.tsv -p 4 \
-e envfile.dummy -n 1 \
-m matrices/SBCC_nr_subpops_matrix_mean.txt \
-k 200000 -r "$rnd" -o SBCC_9K_subpops.tsv.xtx.rep1 -c \
> SBCC_9K_subpops.tsv.xtx.rep1.stdout 2>&1;

# 2nd run
rnd=$(perl -e 'printf("%05d",rand(99999))');
echo "$rnd" > SBCC_9K_subpops.tsv.xtx.rep2.rnd

../soft/bayenv2/calc_xtx_parallel.pl 5 bayenv2 -X -t -i SBCC_9K_subpops.tsv -p 4 \
-e envfile.dummy -n 1 \
-m matrices/SBCC_nr_subpops_matrix_mean.txt \
-k 200000 -r "$rnd" -o SBCC_9K_subpops.tsv.xtx.rep2 -c \
> SBCC_9K_subpops.tsv.xtx.rep2.stdout 2>&1;

# 3rd run
rnd=$(perl -e 'printf("%05d",rand(99999))');
echo "$rnd" > SBCC_9K_subpops.tsv.xtx.rep3.rnd

../soft/bayenv2/calc_xtx_parallel.pl 5 bayenv2 -X -t -i SBCC_9K_subpops.tsv -p 4 \
-e envfile.dummy -n 1 \
-m matrices/SBCC_nr_subpops_matrix_mean.txt \
-k 200000 -r "$rnd" -o SBCC_9K_subpops.tsv.xtx.rep3 -c \
> SBCC_9K_subpops.tsv.xtx.rep3.stdout 2>&1;

# compress outfiles
gzip SBCC_9K_subpops.tsv.xtx.rep1
gzip SBCC_9K_subpops.tsv.xtx.rep2
gzip SBCC_9K_subpops.tsv.xtx.rep3

Let’s inspect one of the XtX bayenv files:

cd xtx_subpops
zcat SBCC_9K_subpops.tsv.xtx.rep1.gz | head -6
snp_batch0000   6.1464e+00
snp_batch0001   6.0930e+00
snp_batch0002   7.1759e+00
snp_batch0003   5.9965e+00
snp_batch0004   5.2972e+00
snp_batch0005   5.8048e+00

Annotating and exporting XtX results

cd xtx_subpops

# add SNP name to XtX results
perl xtx2table.pl SBCC_9K_subpops.tsv.xtx.rep1.gz ../SBCC_9K_SNPs.annot.tsv > \
  SBCC_9K_subpops.tsv.xtx.rep1.tsv
perl xtx2table.pl SBCC_9K_subpops.tsv.xtx.rep2.gz ../SBCC_9K_SNPs.annot.tsv > \
  SBCC_9K_subpops.tsv.xtx.rep2.tsv
perl xtx2table.pl SBCC_9K_subpops.tsv.xtx.rep3.gz ../SBCC_9K_SNPs.annot.tsv > \
  SBCC_9K_subpops.tsv.xtx.rep3.tsv

head -6 SBCC_9K_subpops.tsv.xtx.rep1.tsv

gzip -f SBCC_9K_subpops.tsv.xtx.rep1.tsv
gzip -f SBCC_9K_subpops.tsv.xtx.rep2.tsv
gzip -f SBCC_9K_subpops.tsv.xtx.rep3.tsv


# add cM & bp map positions (./obtain_map.R IN OUT MAP)

Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep1.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep1.bp.tsv ../raw/9920_SNPs_SBCC_bp_map2017.curated.tsv 
Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep2.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep2.bp.tsv ../raw/9920_SNPs_SBCC_bp_map2017.curated.tsv 
Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep3.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep3.bp.tsv ../raw/9920_SNPs_SBCC_bp_map2017.curated.tsv 

Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep1.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep1.cM.tsv ../raw/9920_SNPs_SBCC_cM_map2017.curated.tsv 
Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep2.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep2.cM.tsv ../raw/9920_SNPs_SBCC_cM_map2017.curated.tsv 
Rscript ./obtain_map.R SBCC_9K_subpops.tsv.xtx.rep3.tsv.gz \
  SBCC_9K_subpops.tsv.xtx.rep3.cM.tsv ../raw/9920_SNPs_SBCC_cM_map2017.curated.tsv

# join all results into a single file
join -t $'\t' SBCC_9K_subpops.tsv.xtx.rep1.bp.tsv SBCC_9K_subpops.tsv.xtx.rep2.bp.tsv \
  | join -t $'\t' - SBCC_9K_subpops.tsv.xtx.rep3.bp.tsv \
  | awk '{print $1"\t"$3"\t"$4"\t"$2"\t"$5"\t"$8}' \
  >  SBCC_9K_subpops.tsv.xtx.bp.tsv 

join -t $'\t' SBCC_9K_subpops.tsv.xtx.rep1.cM.tsv SBCC_9K_subpops.tsv.xtx.rep2.cM.tsv \
  | join -t $'\t' - SBCC_9K_subpops.tsv.xtx.rep3.cM.tsv \
  | awk '{print $1"\t"$3"\t"$4"\t"$2"\t"$5"\t"$8}' \
  >  SBCC_9K_subpops.tsv.xtx.cM.tsv

head -6 SBCC_9K_subpops.tsv.xtx.bp.tsv

head -6 SBCC_9K_subpops.tsv.xtx.cM.tsv
SNPidentifier   XtX
BOPA1_2511-533  6.15
BOPA1_3107-422  6.09
BOPA1_8670-388  7.18
BOPA2_12_10420  6.00
BOPA2_12_30653  5.30
pdf 
  2 
pdf 
  2 
pdf 
  2 
pdf 
  2 
pdf 
  2 
pdf 
  2 
SNPidentifier   chr bp  XtX XtX XtX
3258773|F|0 1   47954   3.77    3.78    3.54
3256790|F|0 1   51877   3.62    3.75    3.59
BOPA2_12_10420  1   71604   6   6.15    5.99
BOPA1_3107-422  1   71803   6.09    6.64    6.5
SCRI_RS_204276  1   248471  4.76    4.48    4.63
SNPidentifier   chr cM  XtX XtX XtX
3255561|F|0 1   0   5.99    6.3 5.97
3256686|F|0 1   0   5.85    6.17    6.02
3256733|F|0 1   0   6.05    5.96    5.8
3256790|F|0 1   0   3.62    3.75    3.59
3258773|F|0 1   0   3.77    3.78    3.54

The final results file of tis protocol are SBCC_9K_subpops.tsv.xtx.bp.tsv and SBCC_9K_subpops.tsv.xtx.cM.tsv