From df2ed521c9d341b33e405d63dac6501bcff9b330 Mon Sep 17 00:00:00 2001 From: Borgal Date: Thu, 18 Jul 2024 14:00:16 +0000 Subject: [PATCH] =?UTF-8?q?paperless/Scripts=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paperless/scripts/pre-consume.sh | 5 +++ paperless/scripts/remove-blank-pages.sh | 49 +++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 paperless/scripts/pre-consume.sh create mode 100755 paperless/scripts/remove-blank-pages.sh diff --git a/paperless/scripts/pre-consume.sh b/paperless/scripts/pre-consume.sh new file mode 100755 index 0000000..1436ab0 --- /dev/null +++ b/paperless/scripts/pre-consume.sh @@ -0,0 +1,5 @@ +#!/bin/sh +set -x + +# Remove blank pages +/usr/src/paperless/scripts/remove-blank-pages.sh \ No newline at end of file diff --git a/paperless/scripts/remove-blank-pages.sh b/paperless/scripts/remove-blank-pages.sh new file mode 100755 index 0000000..3c2d05c --- /dev/null +++ b/paperless/scripts/remove-blank-pages.sh @@ -0,0 +1,49 @@ +#!/bin/bash +#set -x -e -o pipefail +set -e -o pipefail +export LC_ALL=C + +#IN="$1" +IN="$DOCUMENT_WORKING_PATH" + +# Check for PDF format +TYPE=$(file -b "$IN") + +if [ "${TYPE%%,*}" != "PDF document" ]; then + >&2 echo "Skipping $IN - non PDF [$TYPE]." + exit 0 +fi + +# PDF file - proceed + +#PAGES=$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9') +PAGES=$(pdfinfo "$IN" | awk '/Pages:/ {print $2}') + +>&2 echo Total pages $PAGES + + +# Threshold for HP scanners +# THRESHOLD=1 +# Threshold for Lexmar MC2425 +THRESHOLD=0.8 + + +non_blank() { + for i in $(seq 1 $PAGES) ; do + PERCENT=$(gs -o - -dFirstPage=${i} -dLastPage=${i} -sDEVICE=ink_cov "${IN}" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ') + >&2 echo -n "Color-sum in page $i is $PERCENT: " + if awk "BEGIN { exit !($PERCENT > $THRESHOLD) }"; then + echo $i + >&2 echo "Page added to document" + else + >&2 echo "Page removed from document" + fi + done +} + +NON_BLANK=$(non_blank) + +if [ -n "$NON_BLANK" ]; then + NON_BLANK=$(echo $NON_BLANK | tr ' ' ",") + qpdf "$IN" --warning-exit-0 --replace-input --pages . $NON_BLANK -- +fi \ No newline at end of file