########### # # GenMedMSc_CM2.2_practical.R # ## A practical to accompany lectures CM2.1 and CM2.2 of the ## Genomic Medicine MSc core module 2 ## by Luke Jostins-Dean, Jennie Astley and Alexandra Martin-Geary ## 24th October 2023 # ##Part 0 - setting up packages and downloading files #There are three example files in the same directory on canvas that you downloaded this R script from #You will need to edit this command to where-ever you downloaded the three example files to on your system: # Copy folder path to clipboard from Files file_prefix <- '~/MSc/' #you will also need to install three packages from CRAN: pedtools, paramlink2 and genpwr, if you don't already have them installed: install.packages('pedtools') install.packages('paramlink2') install.packages('genpwr') ###Part 1 - making and visualising simple pedigrees ### The package pedtools can make pedigrees. Lets load the package: library(pedtools) #the function nuclearPed makes a file representing a simple nuclear family with 5 children #(nch specifies the number of children) nuclear_family_1 <- nuclearPed(nch=5) nuclear_family_1 #this type of format is often called "pedigree" or "ped" format #each individual has an id (here numbered 1 to 7), as well as the ID of that individual's father ("fid") #and mother ("mid"), where "*" or "0" means that that parent isn't present in the pedigree, and the #sex (1 for male, 2 for female). #Pedigree format can also include other information that isn't present here, such as disese status. #in our pedigree everyone other than the mother is male. Lets make another pedigree with a mixture of sexes #among the children, specified using the argument "sex" nuclear_family_2 <- nuclearPed(nch=5,sex=c(1,2,1,2,1)) nuclear_family_2 #much more balanced #the pedtools package comes with a default plotting function for pedigrees. Call it like this: plot(nuclear_family_2) #this creates a pedigree plot, or family tree. #Squares are male, circles are female, the parents are at the top and all the offspring are at the bottom. #next, we are going to put some genotypes onto the pedigree. #Let's put on genotypes assuming both parents are heterozygous (i.e. A/a) #and the offspring range from homozygous reference ("A/A"), heterozygous ("A/a") to homozygous alternative ("a/a"): nuclear_family_2_genotypes <- c("A/a","A/a","a/a","A/A","A/a","A/a","A/A") #pedtools has a "marker" format that holds marker data for a pedigree. #we can use the "marker" function to make a marker file for our mixed-sex pedigree nuclear_family_2_markers <- marker(nuclear_family_2,geno=nuclear_family_2_genotypes) #pedtools can now visualise this if we give it both the pedigree and the marker object plot(nuclear_family_2,marker= nuclear_family_2_markers) #you should now see the genotypes on below the family members #finally, we can set which patients have the disease. #lets assume that this variant we have made up drives a fully penetrant recessive Mendelian disease. #So we make an affection status vector, which is TRUE for individuals who are homozygous a (and thus have the disease) #and FALSE (i.e. does does not have the disease) otherwise. nuclear_family_2_affected <- nuclear_family_2_genotypes == "a/a" #we can then extract out a list of the IDs of patients who have the disease #using a vector subset (i.e. square bracket) operation nuclear_family_2_affected_ids <- nuclear_family_2$ID[nuclear_family_2_affected] #this should only include on individual, as only one individual was "a/a" #lets plot the pedigree, with genotypes and affection statuses, together plot(nuclear_family_2,marker= nuclear_family_2_markers, aff = nuclear_family_2_affected_ids) ##Tasks for part 1 #Task 1.1 (easy) - make another example pedigree with genotypes and affection status, #illustrating a autosomal dominant disease #Task 1.2 (harder) - make another pedigree, this time showing an X-linked recessive disease. #This will require a bit of thinking, as males should be coded as "a" or "A", and female "A/A", "A/a" or "a/a". #You can tell the marker() function to expect an X chromosome by setting the argument chrom="X". ###Part 2 - Reading in bigger pedigree file #we have a ped file of cousins, similar to the previous pedigree. Let's read it in and take a look at it; pedfile <- readPed(paste0(file_prefix,'/Pedigree2.ped')) #NOTE: If this doesn't work, and you get either an "object 'file_prefix' not found" or #a "No such file or directory" error, this probably means you have not set up the #folder and downloaded the files in Part 0 at the top of this script. #take a look at the pedigree we have read in plot(pedfile) #this is a much larger pedigree than the ones we looked at above, and has multiple generations #we also have affection statuses for the individuals afffile <- read.table(paste0(file_prefix,'/Pedigree2.aff')) #let's just check that the individuals are in the same order all(afffile$id == pedfile$ID) #like we in Part 1, lets make a list of affected individuals from this pedigree pedfile_affected_ids <- pedfile$ID[afffile$phenotype == TRUE] #and let's plot the affection status on the pedigree plot(pedfile,aff=pedfile_affected_ids) #lots of cases! #note that in this case, the markers are stored in the ped file itself (rather than a seperate Marker object), #as a list of marker objects. #This is how we can pull out the geneotypes for the first marker: pedfile$MARKERS[[1]] #the pedigree file itself doesn't store details on the markers (their position, their chromosome, etc). #That is stored in a map file, which includes the chromosome, the name of the marker, and the position #of the marker (in Mb): mapfile <- read.table(paste0(file_prefix,'/Pedigree2.map')) head(mapfile) #so, we can see that the first marker: mapfile[1,] #is on chromosome 1, is called "marker1", and is at around 1.43Mb. #We can also plot that first marker on the pedigree: plot(pedfile,marker=pedfile$MARKERS[[1]],aff=pedfile_affected_ids) #this first variant doesn't seem to line up with affection status. #Task 2.1 (easy) - can you also make plots for a marker toward the middle of the chromosome region, and one #from the end. Could any of these plausibly explain the disease? #Task 2.2 (easyish-but-a-bit-thinky,might require the use of Google): this linkage map is only for chrosomome 1. #Does it cover all of chromosome 1? If not, what proportion does it cover? ###Part 3 - Running a linkage scan #in this section, we will try out a parametric linkage analysis #load the paramlink2 library, for doing parametric linkage library(paramlink2) #Can any of the other variants explain the disease? Lets do a parametric linkage scan. #Remember, parametric means that you set model parameters yourself, so we need to decide #on what inheritance model to use. #What models are compatible with this data? #It couldn't be a fully penetrant dominant model, because the disease skips a generation in some cases (e.g. individual 6). #It could be a partially penetrant dominant variant, though. Lets try that. #we set a disease model for paramlink2 using the diseaseModel() function #we need to give it a model (AD for autosomal dominant, AR for autoosmal recessive, and so on) #we can set an autosomal dominant model like this: modAD = diseaseModel(model="AD") #look at the model details: modAD #you can see that function has set some default values. #One thing it has set is the penetrance - it has set this as the penetrance vector (0,1,1) #this means that 0% of A/A, 100% of A/a and 100% of a/a individuals will have the disease #BUT! We know that this doesn't work for our pedigree. So, lets set the penetrance lower, at 80%: modAD_partialPenetrance = diseaseModel("AD", penetrances=c(0,0.8,0.8)) #we can use the pedigree file, the list of affected individuals and the genetic model to run a linkage scan #using the function lod(): lodsAD = lod(pedfile, aff = pedfile_affected_ids, model = modAD_partialPenetrance) #the function is called lod because it generates LOD (log-odds) statistics from the linkage scan #we can plot the LOD scores for each marker along the chromosome plot(lodsAD) #there are no LOD scores greater than 2, so no strong signal there. #Let's try another model. This pedigree could, in theory, be compatible with a recessive model. #set the model to autosomal recessive ("AR"), with default penetrances. modAR = diseaseModel("AR") #and re-run the linkage scan with this model: lodsAR = lod(pedfile, aff = pedfile_affected_ids, model = modAR) #And plot the results plot(lodsAR) #Bingo! We have a big hit (LOD > 6). #Task 3.1 (easyish, think about how to do subsetting in R) - which marker is this hit? Where is it located in the genome? #Task 3.2 (a bit harder) - make a plot of this marker on the pedigree, along with affection status - how well does #it seem to fit? #Tast 3.3 (harder, and thinkier) - what is the allele frequency of this variant in the founders of this pedigree? #Does this seem suprising? Under what circumstances might you see a pathogenic variant to rise to such a high #allele frequency?