ComputeVM: create_resources.sh

File create_resources.sh, 5.1 KB (added by freerkvandijk, 9 years ago)
Line 
1#Set parameters and values
2GSAPUBFTP_PATH="bundle/2.8/b37"
3ROOT="/volume/"
4RESDIR="$ROOT/resources/b37/"
5TMPDIR="$ROOT/tmp"
6BWAVERSION="0.7.12-goolf-1.7.20"
7
8
9###Create tmp directory and cd to it
10mkdir -p $TMPDIR
11cd $TMPDIR
12
13
14###wget resources
15wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.dict.gz
16wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.dict.gz.md5
17wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.gz
18wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.gz.md5
19wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.fai.gz
20wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/human_g1k_v37.fasta.fai.gz.md5
21wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.gz
22wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.gz.md5
23wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz
24wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz.md5
25wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.gz
26wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.gz.md5
27wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.idx.gz
28wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.vcf.idx.gz.md5
29wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.gz
30wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.gz.md5
31wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.idx.gz
32wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/dbsnp_138.b37.excluding_sites_after_129.vcf.idx.gz.md5
33wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.gz
34wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.gz.md5
35wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.idx.gz
36wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org//$GSAPUBFTP_PATH/1000G_phase1.indels.b37.vcf.idx.gz.md5
37wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_baits_b37_human_g1k_v37.bed
38wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_baits_b37_human_g1k_v37.interval_list
39wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_exons_b37_human_g1k_v37.bed
40wget https://molgenis26.target.rug.nl/downloads/intervals/testKit1_exons_b37_human_g1k_v37.interval_list
41wget https://molgenis26.target.rug.nl/downloads/intervals/1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.chr1.intervals
42
43
44###Generate correct md5sum files
45for i in $( ls *.md5 );
46do
47echo "Generating correct md5sum file for $i";
48
49SUM=`awk '{print $1}' $i`;
50FILENAME=`cat $i | xargs -0 basename`;
51
52echo -e "$SUM  $FILENAME" > $i.proper.md5;
53
54rm $i;
55
56done
57
58
59###Check md5sums generated files
60for i in $( ls *.proper.md5 );
61do
62echo "Checking md5sum for file $i";
63
64md5sum -c $i;
65
66done
67
68
69###Unzip all *.gz files
70for i in $( ls *.gz );
71do
72echo "Gunzipping file $i";
73
74gunzip $i;
75
76done
77
78
79###Index reference genome using BWA
80module use /srv/molgenis/.local/easybuild/modules/all/
81module avail
82module load BWA/$BWAVERSION
83
84bwa index -p human_g1k_v37.fasta -a bwtsw human_g1k_v37.fasta
85
86
87###Create folder structure for resources
88mkdir -p $RESDIR/indices/
89mkdir -p $RESDIR/intervals/
90mkdir -p $RESDIR/snp/dbSNP/
91mkdir -p $RESDIR/snp/1000G/
92mkdir -p $RESDIR/sv/1000G
93
94
95###Move all resources to correct folder
96mv human_g1k_v37.dict $RESDIR/indices/
97mv human_g1k_v37.fasta $RESDIR/indices/
98mv human_g1k_v37.fasta.amb $RESDIR/indices/
99mv human_g1k_v37.fasta.ann $RESDIR/indices/
100mv human_g1k_v37.fasta.bwt $RESDIR/indices/
101mv human_g1k_v37.fasta.pac $RESDIR/indices/
102mv human_g1k_v37.fasta.sa $RESDIR/indices/
103mv human_g1k_v37.fasta.fai $RESDIR/indices/
104mv Mills_and_1000G_gold_standard.indels.b37.vcf $RESDIR/sv/1000G
105mv Mills_and_1000G_gold_standard.indels.b37.vcf.idx $RESDIR/sv/1000G
106mv dbsnp_138.b37.vcf $RESDIR/snp/dbSNP/
107mv dbsnp_138.b37.vcf.idx $RESDIR/snp/dbSNP/
108mv dbsnp_138.b37.excluding_sites_after_129.vcf $RESDIR/snp/dbSNP/
109mv dbsnp_138.b37.excluding_sites_after_129.vcf.idx $RESDIR/snp/dbSNP/
110mv 1000G_phase1.indels.b37.vcf $RESDIR/sv/1000G
111mv 1000G_phase1.indels.b37.vcf.idx $RESDIR/sv/1000G
112mv testKit1_baits_b37_human_g1k_v37.bed $RESDIR/intervals/
113mv testKit1_baits_b37_human_g1k_v37.interval_list $RESDIR/intervals/
114mv testKit1_exons_b37_human_g1k_v37.bed $RESDIR/intervals/
115mv testKit1_exons_b37_human_g1k_v37.interval_list $RESDIR/intervals/
116mv 1000G_phase1.indels_Mills_and_1000G_gold_standard.indels.b37.human_g1k_v37.chr1.intervals $RESDIR/intervals/
117
118###Remove temporary folder
119rm -r $TMPDIR