ATAC-seq主要用来研究染色质的可及性,其中Tn5转座子酶切位点可以用来分析转录因子的活性,TOBIAS 就可以用来进行类似的分析。
fastq-dump --split-e --gzip <input file> -O <output_directory>
wget ftp://ftp.ensembl.org/pub/release-101/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz
wget ftp://ftp.ensembl.org/pub/release-101/gtf/homo_sapiens/Homo_sapiens.GRCh38.101.gtf.gz
trim_galore --paired --fastqc -q 10 --length 30 --stringency 3 -o <output_directory> <READ1> <READ2>
bowtie2-build -f <reference_genome> <bt2-idx>
bowtie2 -x <bt2-idx> -1 <trimmed_1> -2 <trimmed_2> \
-t -q -N 1 -L 25 -X 2000 --no-mixed --no-discordant -p 10 -S <sam>
samtools view -@ 10 -bS <Sam> <Bam>
samtools sort -@ 4 <Bam> -o <Sorted_Bam>
samtools index -b -@ 4 <sorted.bam>
samtools view -b -q 10 <sorted.bam> \
1 2 3 4 5 6 7 8 9 10 11 12 13 \
14 15 16 17 18 19 20 21 22 \
X Y | samtools sort -o <sorted_chr4.bam>
java -Xmx2g -jar picard.jar MarkDuplicates \
I=input.bam \
O= duplicates_removed.bam \
REMOVE_DUPLICATES=true \
M= marked_dup_metrics.txt
java -Xmx2g -jar picard.jar CollectInsertSizeMetrics \ I= duplicates_removed.bam \ O= insert_size_metrics.txt \ H= insert_size_histogram.pdf \ M=0.5
macs2 callpeak -t <name>_chr4.bam -B --nomodel \
--shift -100 --extsize 200 --broad \
-g hs -n <name> --outdir <directory> -f BAM
使用UROPA 注释到最近的基因.
cat <name>_peaks.broadPeak \
|cut -f1-3 | sort -k1,1 -k2,2n \
|bedtools merge -d 5 \
|bedtools subtract -a - -b <blacklist> -A \
|awk '{print $s"\t<name>_"NR}' > <name>.bed\
cat Tcell.bed Bcell.bed |sort -k1,1 -k2,2n | bedtools merge -d 5 -c 4 -o collapse \
> all_merged.bed
uropa --bed all_merged.bed \
--gtf transcripts_chr4.gtf \
--prefix all_merged_annotated \
--show_attributes gene_id gene_name \
--feature_anchor start \
--distance 20000 10000 \
--feature gene \
--outdir <directory> \
--log uropa.log
cut -f 1-6,16-17 all_merged_annotated_finalhits.txt | head -n 1 > all_merged_annotated_header.txt
cut -f 1-6,16-17 all_merged_annotated_finalhits.txt|tail -n +2 > all_merged_annotated.bed
Correct Tn5 sequence bias:
TOBIAS ATACorrect --bam <name>_chr4.bam \
--genome $DATA/genome.fa.gz \
--peaks $OUTPUT/all_merged.bed \
--blacklist $DATA/blacklist.bed \
--outdir $OUTPUT \
--cores 8 \
--prefix <head> &> <head>_atacorrect.log
Calculate footprint scores:
TOBIAS FootprintScores \
--signal <name>_corrected.bw \
--regions all_merged.bed \
--output <name>_footprints.bw \
--cores 8 &> <name>_footprinting.log
Estimate differential TFBS:
TOBIAS BINDetect \
--motifs motifs.jaspar \
--signals Bcell_footprints.bw Tcell_footprints.bw \
--genome genome.fa.gz \
--peaks all_merged_annotated.bed \
--peak_header all_merged_annotated_header.txt \
--cores 8 \
--cond_names Bcell Tcell \
--outdir <directory> &> BTbindetect.log