#!/bin/bash ## Cutadapt identifies any sequences in a given input file that have the specified forward and reverse primers, discards any that do not have the primers, and removes the primers from the sequences that did have them. ## Can specify a given "error" tolerance to match the input primers (0.1 is 10% of nucleotides that can mismatch, from single-point mutations or indels), as well as minimum and maximum total sequence lengths to include (any sequences outside these lengths will be discarded). ## Many additional options are available (including different ways to match adapters/primers). The ones used here are most useful for in vitro selection libraries, but it's a good idea to see what other options are available from the Cutadapt manual (see below link for installation). ## Can run for both single-end reads and paired-end reads (comment/uncomment the one you need). ## Will run for the "forward" orientation of primers as well as the "reverse" orientation. This is useful for HTS libraries prepared with a blunt end ligation protocol, since this can incorporate sequences in either orientation with approximately 50% rate for each. ## Requires installation of the Cutadapt program, found here: https://cutadapt.readthedocs.io/en/stable/ ## Cutadapt Publication: https://doi.org/10.14806/ej.17.1.200 ## Cutadapt Variables threads=1 ## Number of threads/cores to use. error=0.1 ## Maximum error rate for adapters. min_length=8 ## Minimum length of sequences to keep. max_length=150 ## Maximum length of sequences to keep. fwd_primer=NNNNNNNNNNNNNNNNNN ## Forward primer or P5 Adapter (5'-3'). rev_primer=NNNNNNNNNNNNNNNNNN ## Reverse primer or P7 Adapter (5'-3'). R1_file_input=/full/path/to/file.fastq ## Input File for Single-End Cutadapt (or Paired-End R1) R2_file_input=/full/path/to/file.fastq ## Input File for R2 Paired-End Cutadapt R1_cut_fwd_file_output=/full/path/to/file.r1.fwd.fastq ## Output File for Forward SE (or PE R1) R2_cut_fwd_file_output=/full/path/to/file.r2.fwd.fastq ## Output File for Forward PE R2 R1_cut_rev_file_output=/full/path/to/file.r1.rev.fastq ## Output File for Reverse SE (or PE R1) R2_cut_rev_file_output=/full/path/to/file.r2.rev.fastq ## Output File for Reverse PE R2 ## Defining the adapters used for identification by Cutadapt. five_p_adapt1=$fwd_primer five_p_adapt2=$rev_primer three_p_adapt1=$( echo $rev_primer | tr ACGTacgt TGCAtgca | rev ) three_p_adapt2=$( echo $fwd_primer | tr ACGTacgt TGCAtgca | rev ) ## If using Paired-End Reads, comment out (##) the Single-End line and uncomment the Paired-End line. ## Single-End Reads Cutadapt (Forward) cutadapt -j $threads -a ^$five_p_adapt1...$three_p_adapt1 -e $error -m $min_length -M $max_length -o $R1_cut_file_output $R1_input_file_path ## Paired-End Reads Cutadapt (Forward) ##cutadapt -j $threads -a ^$five_p_adapt1...$three_p_adapt1 -A ^$five_p_adapt2...$three_p_adapt2 -e $error -m $min_length -M $max_length --discard-untrimmed -o $R1_cut_file_output -p $R2_cut_file_output $R1_input_file_path $R2_input_file_path ## Single-End Reads Cutadapt (Reverse) cutadapt -j $threads -a ^$five_p_adapt2...$three_p_adapt2 -e $error -m $min_length -M $max_length -o $R1_cut_file_output $R1_input_file_path ## Paired-End Reads Cutadapt (Reverse) ##cutadapt -j $threads -a ^$five_p_adapt2...$three_p_adapt2 -A ^$five_p_adapt1...$three_p_adapt1 -e $error -m $min_length -M $max_length --discard-untrimmed -o $R1_cut_file_output -p $R2_cut_file_output $R1_input_file_path $R2_input_file_path