# HG changeset patch # User Steve Losh # Date 1582512645 18000 # Node ID 19cc0bdaf3cc6161c40e490f098d672f8bb6816f # Parent dfec45c7e50024b7ee0ee3926f7c5feb0e7bf1f3 Update diff -r dfec45c7e500 -r 19cc0bdaf3cc README.markdown --- a/README.markdown Sun Feb 23 18:59:29 2020 -0500 +++ b/README.markdown Sun Feb 23 21:50:45 2020 -0500 @@ -960,7 +960,6 @@ set title "READ COUNTS OF INDIVIDUAL FASTQ FILES" set xlabel "READS (MILLIONS)" - # major x tics every 2 million, with 2 minor divisions per major (i.e. minor tics are every 1 million) set xtics 2 set mxtics 2 @@ -981,3 +980,11 @@ ![plot](https://i.imgur.com/YAPXHaQ.png) Neat! + +Hacked together some Awk to remove overrepresented sequences. **But** I don't +think a simple `grep -v` approach works, because the two FASTQ files are +expected to have the paired reads at the same positions in the file. So if we +remove a read from one file but not the other, now all the reads are going to be +offset. So we need to remove these reads a bit more carefully (really, we need +tools that process the paired-end reads together). Need to think about this +a little bit more.