diff --git a/paperless/README.md b/paperless/README.md index 196b402..b27d3c6 100644 --- a/paperless/README.md +++ b/paperless/README.md @@ -9,12 +9,80 @@
Paperless ngx -192.168.0.102:/mnt/pool1/scanner /opt/paperless/consume nfs4 auto,mountvers=4.0 0 0+
192.168.0.102:/mnt/pool1/scanner /opt/paperless/consume nfs auto 0 0
apt install nfs-common\ No newline at end of file +
apt install nfs-common+ +
+#!/bin/sh +set -x + +# Remove blank pages +/scripts/remove-blank-pages.sh ++ + +Added /usr/src/paperless/scripts/remove-blank-pages.sh + +
+#!/bin/bash
+# set -x -e -o pipefail
+set -e -o pipefail
+export LC_ALL=C
+
+# IN="$1"
+IN="$DOCUMENT_WORKING_PATH"
+
+# Check for PDF format
+TYPE=$(file -b "$IN")
+
+if [ "${TYPE%%,*}" != "PDF document" ]; then
+ >&2 echo "Skipping $IN - non PDF [$TYPE]."
+ exit 0
+fi
+
+# PDF file - proceed
+
+# PAGES=$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9')
+PAGES=$(pdfinfo "$IN" | awk '/Pages:/ {print $2}')
+
+>&2 echo Total pages $PAGES
+
+
+# Threshold for HP scanners
+THRESHOLD=1
+# Threshold for Lexmar MC2425
+# THRESHOLD=0.8
+
+
+non_blank() {
+ for i in $(seq 1 $PAGES) ; do
+ PERCENT=$(gs -o - -dFirstPage=${i} -dLastPage=${i} -sDEVICE=ink_cov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ')
+ >&2 echo -n "Color-sum in page $i is $PERCENT: "
+ if awk "BEGIN { exit !($PERCENT > $THRESHOLD) }"; then
+ echo $i
+ >&2 echo "Page added to document"
+ else
+ >&2 echo "Page removed from document"
+ fi
+ done
+}
+
+NON_BLANK=$(non_blank)
+
+if [ -n "$NON_BLANK" ]; then
+ NON_BLANK=$(echo $NON_BLANK | tr ' ' ",")
+ qpdf "$IN" --warning-exit-0 --replace-input --pages . $NON_BLANK --
+fi
+
+