############################################################################### # README file for how the calling regions for the single-sample HC based # exome calling pipeline were created. The following is a two-part process # of identifying and formating various input sets of calling regions and then # secondarily combining and padding said regions. ############################################################################### # =============================================== # =============== gencode.v19.il ================ # =============================================== wget ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz cp $hg19/Homo_sapiens_assembly19.dict tmp.il zcat gencode.v19.annotation.gtf.gz | awk '$3 == "CDS" {if ($1=="chrM") chr="MT"; else chr=$1; print chr, $4, $5, $7, $18}' | tr ' ' '\t' | sed 's/chr//' | tr -d '";' >> tmp.il java -jar ~tfennell/bin/IntervalListTools.jar I=tmp.il O=gencode.v19.il UNIQUE=true rm tmp.il # =============================================== # =============== mirbase.v20.il ================ # =============================================== wget -O mirbase.v20.gff ftp://mirbase.org/pub/mirbase/CURRENT/genomes/hsa.gff3 cp $hg19/Homo_sapiens_assembly19.dict tmp.il cat mirbase.v20.gff | awk '$3 == "miRNA_primary_transcript" {sub(/chr/,"",$1); sub(/M/, "MT", $1); sub(/.*Name=/, "", $9); print $1,$4,$5,$7,$9}' | tr ' ' '\t' >> tmp.il java -jar ~tfennell/bin/IntervalListTools.jar I=tmp.il O=mirbase.v20.il UNIQUE=true rm tmp.il # =============================================== # ============= illumina_exome.il =============== # =============================================== cp $hg19/Homo_sapiens_assembly19.dict tmp.il egrep -v '^@' /seq/references/HybSelOligos/whole_exome_illumina_coding_v1/whole_exome_illumina_coding_v1.Homo_sapiens_assembly19.targets.interval_list \ | awk 'BEGIN {OFS="\t"} {print $1, $2, $3, $4, "ice_target_" FNR}' >> tmp.il java -jar ~tfennell/bin/IntervalListTools.jar I=tmp.il O=ice_coding_v1_targets.il UNIQUE=true rm tmp.il # =============================================== # ======= Put it all together and pad it ======== # =============================================== java -jar ~tfennell/bin/IntervalListTools.jar I=gencode.v19.il I=mirbase.v20.il I=ice_coding_v1_targets.il UNIQUE=true PADDING=50 O=exome_calling_regions.v1.interval_list