|  | 1 | Placeholder | 
                          |  | 2 |  | 
                          |  | 3 | http://sourceforge.net/projects/samtools/files/tabix/ | 
                          |  | 4 | http://vcftools.sourceforge.net/ | 
                          |  | 5 |  | 
                          |  | 6 | https://github.com/molgenis/ngs-utils/blob/master/scripts/vcf-fill-gtc.pl | 
                          |  | 7 |  | 
                          |  | 8 | Super important: | 
                          |  | 9 | #  -ss       : remove sample details! | 
                          |  | 10 |  | 
                          |  | 11 |  | 
                          |  | 12 |  | 
                          |  | 13 |  | 
                          |  | 14 | # | 
                          |  | 15 | # Add bgzip and tabix to your environment. | 
                          |  | 16 | # | 
                          |  | 17 | export PATH=/Volumes/Users/Software/vcftools_0.1.10/bin/:/Volumes/Users/Software/tabix-0.2.6/:${PATH} | 
                          |  | 18 |  | 
                          |  | 19 | # | 
                          |  | 20 | # Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples | 
                          |  | 21 | # | 
                          |  | 22 | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples | 
                          |  | 23 | # Fix missing '>' at the end of contig meta-data lines. | 
                          |  | 24 | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf | 
                          |  | 25 | # Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) | 
                          |  | 26 | for item in $(ls CAR_*/*.vcf); \ | 
                          |  | 27 | do echo "Processing $item..."; \ | 
                          |  | 28 | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ | 
                          |  | 29 | bgzip $item\.sorted\.filtered; \ | 
                          |  | 30 | tabix -p vcf $item\.sorted\.filtered\.gz; \ | 
                          |  | 31 | done | 
                          |  | 32 |  | 
                          |  | 33 | # | 
                          |  | 34 | # Merge sample VCFs into one batch VCF. | 
                          |  | 35 | # | 
                          |  | 36 | vcf-merge CAR_*/*.vcf.sorted.filtered.gz | bgzip -c > merged.vcf.gz | 
                          |  | 37 |  | 
                          |  | 38 | # | 
                          |  | 39 | # Create a summary VCF per batch: | 
                          |  | 40 | #  -ss       : remove sample details! | 
                          |  | 41 | #  -fv PASS  : keep only high quality variant calls that pass all filters applied in NextGene. | 
                          |  | 42 | #              Just to be sure: variants should already have been filtered on PASS only in a previous step, | 
                          |  | 43 | #              so this should be redundant here... | 
                          |  | 44 | #  -si       : remove all INFO subfields except for INFO:AN and INFO:AC. | 
                          |  | 45 | #              INFO:AN and INFO:AC were automatically updated by vcf-merge, | 
                          |  | 46 | #              but the others were not and may contain erroneous annotation | 
                          |  | 47 | #              that cause vcf-validator to complain the created VCF is not valid. | 
                          |  | 48 | # | 
                          |  | 49 | ~pneerincx/EclipseWorkspace/ngs_scripts/vcf-fill-gtc.pl -vcfi merged.vcf.gz -vcfo stripped.vcf -ss -fv PASS -si -ll INFO > stripped.vcf.log | 
                          |  | 50 | mv stripped.vcf      ../CAR_Batch1_106Samples.vcf | 
                          |  | 51 | mv stripped.vcf.log  ../CAR_Batch1_106Samples.vcf.log |