commit 4b3a8e6e45001a4b5772b56be35e877298bfbf72 Author: John Ahlroos Date: Mon Apr 19 13:14:35 2021 +0300 Added Caruna parser diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bffa164 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.gradle +.idea diff --git a/README.md b/README.md new file mode 100644 index 0000000..d765cda --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# PDF Parsers + +This project contains different PDF parsers for my personal use. Feel free to copy for personal use. + +### Caruna Invoice Parser +PDF parser to parse Caruna invoices. + +Usage: ``./gradlew :caruna-invoice:run /path/to/pdf`` \ No newline at end of file diff --git a/build.gradle b/build.gradle new file mode 100644 index 0000000..e69915f --- /dev/null +++ b/build.gradle @@ -0,0 +1,16 @@ +group 'com.devsoap.parsers' +version '1.0' + +subprojects { + apply plugin: 'java' + apply plugin: 'application' + sourceCompatibility = JavaVersion.VERSION_16 + targetCompatibility = JavaVersion.VERSION_16 + repositories { + mavenCentral() + } + dependencies { + implementation 'com.itextpdf:itext7-core:7.1.15' + } +} + diff --git a/caruna-invoice/build.gradle b/caruna-invoice/build.gradle new file mode 100644 index 0000000..683f10f --- /dev/null +++ b/caruna-invoice/build.gradle @@ -0,0 +1 @@ +application.mainClass='com.devsoap.parsers.caruna.Parser' \ No newline at end of file diff --git a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class new file mode 100644 index 0000000..0e82e28 Binary files /dev/null and b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class differ diff --git a/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt b/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt new file mode 100644 index 0000000..9ed8ac7 --- /dev/null +++ b/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt @@ -0,0 +1,2 @@ +com/devsoap/parsers/caruna/Parser.java + com.devsoap.parsers.caruna.Parser diff --git a/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java b/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java new file mode 100644 index 0000000..b7392f5 --- /dev/null +++ b/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java @@ -0,0 +1,112 @@ +package com.devsoap.parsers.caruna; + +import com.itextpdf.kernel.pdf.PdfDocument; +import com.itextpdf.kernel.pdf.PdfPage; +import com.itextpdf.kernel.pdf.PdfReader; +import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.DateFormat; +import java.text.ParseException; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.time.format.TextStyle; +import java.util.Date; +import java.util.Locale; +import java.util.Objects; +import java.util.Scanner; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class Parser { + + private static final Pattern DATE_RANGE_PATTERN = Pattern.compile("(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d) - (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)"); + private static final Pattern PERUSMAKSU_PATTERN = Pattern.compile("Perusmaksu.* (\\d*,\\d\\d) EUR"); + private static final Pattern P_SIIRTO_PATTERN = Pattern.compile("Päiväsiirto.* (\\d*,\\d\\d) snt.* (\\d*,\\d\\d) EUR"); + private static final Pattern O_SIIRTO_PATTERN = Pattern.compile("Yösiirto.* (\\d*,\\d\\d) snt.* (\\d*,\\d\\d) EUR"); + private static final Pattern TAX_PATTERN = Pattern.compile("Sähkövero.* (\\d*,\\d\\d) EUR"); + + private static final Locale FI_LOCALE = new Locale("FI", "fi"); + private static final DateTimeFormatter FI_DATE = DateTimeFormatter + .ofLocalizedDate(FormatStyle.SHORT) + .withLocale(FI_LOCALE); + + public static void main(String[] args) { + var filename = args[0]; + var file = Paths.get(filename); + try(var reader = new PdfReader(file.toFile())) { + var document = new PdfDocument(reader); + var page2 = document.getPage(2); + var text = PdfTextExtractor.getTextFromPage(page2); + + var scanner = new Scanner(text); + var month = ""; + var basicPay = 0.0; + var transferDayPrice = 0.0; + var transferDayTotal = 0.0; + var transforDayKwh = 0L; + var transferNightPrice = 0.0; + var transferNightTotal = 0.0; + var transforNightKwh = 0L; + var tax = 0.0; + + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + while(scanner.hasNextLine()) { + var line = scanner.nextLine(); + if(DATE_RANGE_PATTERN.asPredicate().test(line)) { + if(!Objects.equals(month, "")) { + var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f", + month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax); + System.out.println(csv); + } + + var matcher = DATE_RANGE_PATTERN.matcher(line); + while(matcher.find()) { + month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + .getMonth() + .getDisplayName(TextStyle.FULL, new Locale("FI","fi")); + month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + } + } else if(PERUSMAKSU_PATTERN.asPredicate().test(line)) { + var matcher = PERUSMAKSU_PATTERN.matcher(line); + while (matcher.find()) { + basicPay = Double.parseDouble(matcher.group(1).replace(",", ".")); + } + } else if(P_SIIRTO_PATTERN.asPredicate().test(line)) { + var matcher = P_SIIRTO_PATTERN.matcher(line); + while (matcher.find()) { + transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; + transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); + transforDayKwh = Math.round(transferDayTotal / transferDayPrice); + } + } else if(O_SIIRTO_PATTERN.asPredicate().test(line)) { + var matcher = O_SIIRTO_PATTERN.matcher(line); + while (matcher.find()) { + transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; + transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); + transforNightKwh = Math.round(transferNightTotal / transferNightPrice); + } + } else if(TAX_PATTERN.asPredicate().test(line)) { + var matcher = TAX_PATTERN.matcher(line); + while (matcher.find()) { + tax = Double.parseDouble(matcher.group(1).replace(",", ".")); + } + } + } + var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f", + month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax); + System.out.println(csv); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000..e708b1c Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..f371643 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-7.0-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 0000000..4f906e0 --- /dev/null +++ b/gradlew @@ -0,0 +1,185 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 0000000..107acd3 --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,89 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto execute + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 0000000..b9d1d89 --- /dev/null +++ b/settings.gradle @@ -0,0 +1,3 @@ +rootProject.name = 'pdf-parsers' +include 'caruna-invoice' +