...
Code Block |
---|
conda create -y --name utap r-essentials r-base=3.3.2 conda activate utap # Install packages in the utap environment: # Run the script in this file in your shell: export CONDA_DIR=YOUR_CONDA_DIR # For example: CONDA_DIR=/home/user/miniconda2 $HOST_MOUNT/utap-meta-data/installation/install-conda-packages-transcriptome.sh >& $HOST_MOUNT/utap-meta-data/installation/conda-install-transcriptome.stdout conda deactivate conda create -y --name utap-chromatin conda activate utap-chromatin export CONDA_DIR=YOUR_CONDA_DIR # For example: CONDA_DIR=/home/user/miniconda2 $HOST_MOUNT/utap-meta-data/installation/install-conda-packages-chromatin.sh >& $HOST_MOUNT/utap-meta-data/installation/conda-install-chromatin.stdout conda deactivate conda create -y -n utap-py35 python=3.5 anaconda conda activate utap-py35 conda install -y -c bioconda snakemake==3.13.3 conda deactivate ########## OLD COMMANDS - NOT IN USE ############################ conda create -y --name utap conda activate utap conda env create -f utap_environment.yml -n utap #Run the file for installation packages on utap environment $HOST_MOUNT/utap-meta-data/installation/install-conda-packages.sh conda deactivate conda create -y -n utap-py35 python=3.5 anaconda conda activate utap-py35 conda install -y -c bioconda snakemake conda deactivate ################################################################# |
Create genomes:
Extract the genomes to fasta format and create Star index of the genomes (requires ~135G of disc, but in the building process temporary files requires ~200G):
Code Block |
---|
#Extract genome files: #===================== $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/hg19.2bit $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg19/gemone_hg19.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/hg38.2bit $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg38/gemone_hg38.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/mm10.2bit $HOST_MOUNT/utap-meta-data/genomes/Mus_musculus/UCSC/mm10/gemone_mm10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/danRer10.2bit $HOST_MOUNT/utap-meta-data/genomes/Danio_rerio/UCSC/danRer10/gemone_danRer10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/tair11-araport.2bit $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/ARAPORT/tair11/gemone_tair11-araport.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/tair10.2bit $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/NCBI/tair10/gemone_tair10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/twoBitToFa $HOST_MOUNT/utap-meta-data/2bit_genomes/sl3.2bit $HOST_MOUNT/utap-meta-data/genomes/Solanum_lycopersicum/SGN/sl3/gemone_sl3.fa #Build STAR index to genome files: ================================== These commands take ~1 hour per genome. The commands run on 30 threads (you can change it with --runTreadN parameter) and consume RAM memory as following: *hg19: 29918 MB *hg38: 30574 MB *mm10: 27532 MB *danRer10: 23523 MB *tair11: 4301 MB *tair10: 4282 MB *sl3: 17663 MB #====================================================================================================================================== $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg19/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg19/gemone_hg19.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg38/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg38/gemone_hg38.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Mus_musculus/UCSC/mm10/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Mus_musculus/UCSC/mm10/gemone_mm10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Danio_rerio/UCSC/danRer10/UCSC/danRer10/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Danio_rerio/UCSC/danRer10/gemone_danRer10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/ARAPORT/tair11/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/ARAPORT/tair11/gemone_tair11-araport.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/NCBI/tair10/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/NCBI/tair10/gemone_tair10.fa $HOST_MOUNT/utap-meta-data/softwares/bin/STAR --runThreadN 30 --runMode genomeGenerate --genomeDir $HOST_MOUNT/utap-meta-data/genomes/Solanum_lycopersicum/SGN/sl3/STAR_index/ --genomeFastaFiles $HOST_MOUNT/utap-meta-data/genomes/Solanum_lycopersicum/SGN/sl3/gemone_sl3.fa After the extracting of the fasta files and building the STAR index, you can delete the fasta and .2bit files: ============================================================================================================== rm $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg19/gemone_hg19.fa rm $HOST_MOUNT/utap-meta-data/genomes/Homo_sapiens/UCSC/hg38/gemone_hg38.fa rm $HOST_MOUNT/utap-meta-data/genomes/Mus_musculus/UCSC/mm10/gemone_mm10.fa rm $HOST_MOUNT/utap-meta-data/genomes/Danio_rerio/UCSC/danRer10/gemone_danRer10.fa rm $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/ARAPORT/tair11/gemone_tair11-araport.fa rm $HOST_MOUNT/utap-meta-data/genomes/Arabidopsis_thaliana/NCBI/tair10/gemone_tair10.fa rm $HOST_MOUNT/utap-meta-data/genomes/Solanum_lycopersicum/SGN/sl3/gemone_sl3.fa rm $HOST_MOUNT/utap-meta-data/2bit_genomes/* |
...