35 | | Super important: |
36 | | # -ss : remove sample details! |
| 35 | ''The option -ss is crucial here: it removed all sample details.'' |
| 36 | |
| 37 | Afterwards, be sure to inspect the log file for warnings! |
| 38 | |
| 39 | more stripped.vcf.log |
| 40 | |
| 41 | Full manual: |
| 42 | |
| 43 | Create a summary VCF per batch: |
| 44 | -ss : remove sample details! |
| 45 | -fv PASS : keep only high quality variant calls that pass all filters applied in NextGene. |
| 46 | Just to be sure: variants should already have been filtered on PASS only in a previous step, |
| 47 | so this should be redundant here... |
| 48 | -si : remove all INFO subfields except for INFO:AN and INFO:AC. |
| 49 | INFO:AN and INFO:AC were automatically updated by vcf-merge, |
| 50 | but the others were not and may contain erroneous annotation |
| 51 | that cause vcf-validator to complain the created VCF is not valid. |
| 52 | -ll : specifies log level, e.g. INFO |
46 | | == Troubleshooting == |
47 | | # |
48 | | # Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples |
49 | | # |
50 | | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples |
51 | | # Fix missing '>' at the end of contig meta-data lines. |
52 | | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf |
53 | | # Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) |
54 | | for item in $(ls CAR_*/*.vcf); \ |
55 | | do echo "Processing $item..."; \ |
56 | | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ |
57 | | bgzip $item\.sorted\.filtered; \ |
58 | | tabix -p vcf $item\.sorted\.filtered\.gz; \ |
59 | | done |
| 61 | Prepare sample VCFs for one batch; e.g. CAR_Batch1_106Samples |
| 62 | cd /Volumes/CardioKitVCFs/OriginalVCFs/CAR_Batch1_106Samples |
| 63 | Fix missing '>' at the end of contig meta-data lines. |
| 64 | perl -pi -e 's/(contig=<ID=[^>\n]+)$/$1>/' CAR_*/*.vcf |
| 65 | Sort, filter on 'PASS', bgzip and index with tabix (vcftools will not work on uncompressed, unindexed VCF files.) |
| 66 | for item in $(ls CAR_*/*.vcf); \ |
| 67 | do echo "Processing $item..."; \ |
| 68 | vcf-sort $item | vcf-annotate -H > $item\.sorted\.filtered; \ |
| 69 | bgzip $item\.sorted\.filtered; \ |
| 70 | tabix -p vcf $item\.sorted\.filtered\.gz; \ |
| 71 | done |
61 | | # |
62 | | # Merge sample VCFs into one batch VCF. |
63 | | # |
64 | | vcf-merge CAR_*/*.vcf.sorted.filtered.gz | bgzip -c > merged.vcf.gz |
65 | | |
66 | | # |
67 | | # Create a summary VCF per batch: |
68 | | # -ss : remove sample details! |
69 | | # -fv PASS : keep only high quality variant calls that pass all filters applied in NextGene. |
70 | | # Just to be sure: variants should already have been filtered on PASS only in a previous step, |
71 | | # so this should be redundant here... |
72 | | # -si : remove all INFO subfields except for INFO:AN and INFO:AC. |
73 | | # INFO:AN and INFO:AC were automatically updated by vcf-merge, |
74 | | # but the others were not and may contain erroneous annotation |
75 | | # that cause vcf-validator to complain the created VCF is not valid. |
76 | | # |
77 | | ~pneerincx/EclipseWorkspace/ngs_scripts/vcf-fill-gtc.pl -vcfi merged.vcf.gz -vcfo stripped.vcf -ss -fv PASS -si -ll INFO > stripped.vcf.log |
78 | | mv stripped.vcf ../CAR_Batch1_106Samples.vcf |
79 | | mv stripped.vcf.log ../CAR_Batch1_106Samples.vcf.log |