In order to run the examples in this section you might need to obtain some software and data:
software | source | documentation |
---|---|---|
GAPIT | https://zzlab.net/GAPIT | documentation |
## Curso Tunez - IAMZ
# Read packages
install.packages("devtools")
install.packages("vcfR")
install.packages("tidyverse")
::install_github("jiabowang/GAPIT3",force=TRUE)
devtools
library(GAPIT3)
library(vcfR)
library(tidyverse)
# Set working directory to correct folder, create folder GWAS in each computer,
#and set it as working directory
<- getwd()
curso_iamz_folder setwd(curso_iamz_folder)
# Read genotypic data
# Use of package vcfR to read vcf files, extract relevant information,
# and convert it to formats usable by GAPIT
#file reading
<- read.vcfR("samples12.MISS0.2.MAF0.01.bi.impt.vcf.gz")
gendibar #extract marker information from vcf file
<- extract.gt(gendibar)
gendibar_numeric #show 10 rows and 10 columns, just to check
1:10,1:10]
gendibar_numeric[#converting marker formats to single digits, 0 and 2 for homozygotes, 1 for
#heterozygotes
<- ifelse(gendibar_numeric == "0|0", 0,
gendibar_numeric ifelse(gendibar_numeric == "1|1", 2, 1))
#show 10 rows and 10 columns, just to check
1:10,1:10]
gendibar_numeric[#transpose matrix because GAPIT needs accessions in rows, markers in columns
<- t(gendibar_numeric)
gendibar_numeric #show 10 rows and 10 columns, just to check
1:10,1:10]
gendibar_numeric[#conversion of file from a matrix to a data frame, to allow easy manipulation;
#the result is creating a
#new column with the row names
<- as.data.frame(cbind(rownames(gendibar_numeric),
gendibar_numeric
gendibar_numeric))#new column, number 1, named as "taxa"
colnames(gendibar_numeric)[1] <- "taxa"
#in this case, marker data were read as text, not numbers; next line converts
#marker columns to numbers
2:ncol(gendibar_numeric)] <- sapply(gendibar_numeric[,2:ncol(gendibar_numeric)],
gendibar_numeric[,
as.numeric)#extract accession name from the "taxa" column, from character 13 until the end
#(specific for these data)
$taxa <- substr(gendibar_numeric$taxa,
gendibar_numeric13,
nchar(gendibar_numeric$taxa))
#creation of new file with command from vcfR, which extracts relevant
#information for each marker:
#identifier, chromosome, position
<- getFIX(gendibar)
gendibar_map #reorder columns as required by GAPIT: ID, chromosome, position, and rename
#columns as required by GAPIT
<- as.data.frame(gendibar_map[,c(3,1,2)])
gendibar_map colnames(gendibar_map) <- c("Name", "Chromosome", "Position")
#fill "Name" in file gendibar_map, which was empty, with the colnames of
#gendibar_numeric, skipping the first column (taxa)
$Name <- colnames(gendibar_numeric)[-1]
gendibar_map#GAPIT requires numeric codes for chromosomes; transform codes if needed
$Chromosome <- 2
gendibar_map
# Read phenotypic data
#command read.csv2 reads csv filed with semi colon as column separator; path
#may have to be adjusted
<- read.csv2("Datos_Alturas_cluster.csv", dec = ".")
pheno_data #selects columns with target variables, not essential
<- pheno_data[, c(1:3, 6)]
pheno_data #removes rows (genotypes) with missing data in any variable
<- pheno_data[complete.cases(pheno_data),]
pheno_data #create a new object with intersection of files gendibar_numeric and pheno_data,
#with rows in which $id includes $taxa
<- gendibar_numeric[gendibar_numeric$taxa %in% pheno_data$id,]
gendibar_numeric_2 #same as previous command, but opposite
<- pheno_data[pheno_data$id %in% gendibar_numeric$taxa,]
pheno_data_2 #this is not needed, but explains the next command
summary(gendibar_numeric_2$taxa == pheno_data_2$id)
#match order of files; GAPIT requires same order of genotypic and phenotypic data
<- pheno_data_2[match(gendibar_numeric_2$taxa, pheno_data_2$id),]
pheno_data_2 #change name of first column, to give the same name as recommended in the GAPIT manual
colnames(pheno_data_2)[1:2] <- c("Taxa", "Row_type")
# Run GAPIT
<- "GLM"
GWAS_model <- "height"
trait #Create folders to store GAPIT outputs without overwritting
<- file.path(curso_iamz_folder, GWAS_model, trait)
GWAS_folder dir.create(GWAS_folder, showWarnings = FALSE, recursive = TRUE)
setwd(GWAS_folder)
#running GAPIT
<- GAPIT(
myGAPIT Y = pheno_data_2[,c(1,3)],
GD = gendibar_numeric_2,
GM = gendibar_map,
SNP.MAF=0.05,
PCA.total = 10,
model="GLM",
SNP.fraction = 0.3,
Model.selection = TRUE
)