1
0
Fork 0

Added Caruna parser

This commit is contained in:
John Ahlroos 2021-04-19 13:14:35 +03:00
commit 4b3a8e6e45
Signed by: john
GPG Key ID: 258D0F70DB84CD5D
12 changed files with 423 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.gradle
.idea

8
README.md Normal file
View File

@ -0,0 +1,8 @@
# PDF Parsers
This project contains different PDF parsers for my personal use. Feel free to copy for personal use.
### Caruna Invoice Parser
PDF parser to parse Caruna invoices.
Usage: ``./gradlew :caruna-invoice:run /path/to/pdf``

16
build.gradle Normal file
View File

@ -0,0 +1,16 @@
group 'com.devsoap.parsers'
version '1.0'
subprojects {
apply plugin: 'java'
apply plugin: 'application'
sourceCompatibility = JavaVersion.VERSION_16
targetCompatibility = JavaVersion.VERSION_16
repositories {
mavenCentral()
}
dependencies {
implementation 'com.itextpdf:itext7-core:7.1.15'
}
}

View File

@ -0,0 +1 @@
application.mainClass='com.devsoap.parsers.caruna.Parser'

View File

@ -0,0 +1,2 @@
com/devsoap/parsers/caruna/Parser.java
com.devsoap.parsers.caruna.Parser

View File

@ -0,0 +1,112 @@
package com.devsoap.parsers.caruna;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.DateFormat;
import java.text.ParseException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.FormatStyle;
import java.time.format.TextStyle;
import java.util.Date;
import java.util.Locale;
import java.util.Objects;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Parser {
private static final Pattern DATE_RANGE_PATTERN = Pattern.compile("(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d) - (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)");
private static final Pattern PERUSMAKSU_PATTERN = Pattern.compile("Perusmaksu.* (\\d*,\\d\\d) EUR");
private static final Pattern P_SIIRTO_PATTERN = Pattern.compile("Päiväsiirto.* (\\d*,\\d\\d) snt.* (\\d*,\\d\\d) EUR");
private static final Pattern O_SIIRTO_PATTERN = Pattern.compile("Yösiirto.* (\\d*,\\d\\d) snt.* (\\d*,\\d\\d) EUR");
private static final Pattern TAX_PATTERN = Pattern.compile("Sähkövero.* (\\d*,\\d\\d) EUR");
private static final Locale FI_LOCALE = new Locale("FI", "fi");
private static final DateTimeFormatter FI_DATE = DateTimeFormatter
.ofLocalizedDate(FormatStyle.SHORT)
.withLocale(FI_LOCALE);
public static void main(String[] args) {
var filename = args[0];
var file = Paths.get(filename);
try(var reader = new PdfReader(file.toFile())) {
var document = new PdfDocument(reader);
var page2 = document.getPage(2);
var text = PdfTextExtractor.getTextFromPage(page2);
var scanner = new Scanner(text);
var month = "";
var basicPay = 0.0;
var transferDayPrice = 0.0;
var transferDayTotal = 0.0;
var transforDayKwh = 0L;
var transferNightPrice = 0.0;
var transferNightTotal = 0.0;
var transforNightKwh = 0L;
var tax = 0.0;
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
",Yösiirto (EUR),Vero");
while(scanner.hasNextLine()) {
var line = scanner.nextLine();
if(DATE_RANGE_PATTERN.asPredicate().test(line)) {
if(!Objects.equals(month, "")) {
var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f",
month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax);
System.out.println(csv);
}
var matcher = DATE_RANGE_PATTERN.matcher(line);
while(matcher.find()) {
month = LocalDate.from( FI_DATE.parse(matcher.group(1)))
.getMonth()
.getDisplayName(TextStyle.FULL, new Locale("FI","fi"));
month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2);
}
} else if(PERUSMAKSU_PATTERN.asPredicate().test(line)) {
var matcher = PERUSMAKSU_PATTERN.matcher(line);
while (matcher.find()) {
basicPay = Double.parseDouble(matcher.group(1).replace(",", "."));
}
} else if(P_SIIRTO_PATTERN.asPredicate().test(line)) {
var matcher = P_SIIRTO_PATTERN.matcher(line);
while (matcher.find()) {
transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
transforDayKwh = Math.round(transferDayTotal / transferDayPrice);
}
} else if(O_SIIRTO_PATTERN.asPredicate().test(line)) {
var matcher = O_SIIRTO_PATTERN.matcher(line);
while (matcher.find()) {
transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
transforNightKwh = Math.round(transferNightTotal / transferNightPrice);
}
} else if(TAX_PATTERN.asPredicate().test(line)) {
var matcher = TAX_PATTERN.matcher(line);
while (matcher.find()) {
tax = Double.parseDouble(matcher.group(1).replace(",", "."));
}
}
}
var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f",
month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax);
System.out.println(csv);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

BIN
gradle/wrapper/gradle-wrapper.jar vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,5 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-7.0-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

185
gradlew vendored Executable file
View File

@ -0,0 +1,185 @@
#!/usr/bin/env sh
#
# Copyright 2015 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############################################################################
##
## Gradle start up script for UN*X
##
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
PRG="$0"
# Need this for relative symlinks.
while [ -h "$PRG" ] ; do
ls=`ls -ld "$PRG"`
link=`expr "$ls" : '.*-> \(.*\)$'`
if expr "$link" : '/.*' > /dev/null; then
PRG="$link"
else
PRG=`dirname "$PRG"`"/$link"
fi
done
SAVED="`pwd`"
cd "`dirname \"$PRG\"`/" >/dev/null
APP_HOME="`pwd -P`"
cd "$SAVED" >/dev/null
APP_NAME="Gradle"
APP_BASE_NAME=`basename "$0"`
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"
warn () {
echo "$*"
}
die () {
echo
echo "$*"
echo
exit 1
}
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "`uname`" in
CYGWIN* )
cygwin=true
;;
Darwin* )
darwin=true
;;
MINGW* )
msys=true
;;
NONSTOP* )
nonstop=true
;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD="$JAVA_HOME/jre/sh/java"
else
JAVACMD="$JAVA_HOME/bin/java"
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD="java"
which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
# Increase the maximum file descriptors if we can.
if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
MAX_FD_LIMIT=`ulimit -H -n`
if [ $? -eq 0 ] ; then
if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
MAX_FD="$MAX_FD_LIMIT"
fi
ulimit -n $MAX_FD
if [ $? -ne 0 ] ; then
warn "Could not set maximum file descriptor limit: $MAX_FD"
fi
else
warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
fi
fi
# For Darwin, add options to specify how the application appears in the dock
if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
JAVACMD=`cygpath --unix "$JAVACMD"`
# We build the pattern for arguments to be converted via cygpath
ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
SEP=""
for dir in $ROOTDIRSRAW ; do
ROOTDIRS="$ROOTDIRS$SEP$dir"
SEP="|"
done
OURCYGPATTERN="(^($ROOTDIRS))"
# Add a user-defined pattern to the cygpath arguments
if [ "$GRADLE_CYGPATTERN" != "" ] ; then
OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
fi
# Now convert the arguments - kludge to limit ourselves to /bin/sh
i=0
for arg in "$@" ; do
CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
else
eval `echo args$i`="\"$arg\""
fi
i=`expr $i + 1`
done
case $i in
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=`save "$@"`
# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
exec "$JAVACMD" "$@"

89
gradlew.bat vendored Normal file
View File

@ -0,0 +1,89 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@if "%DEBUG%" == "" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto execute
echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
echo.
echo Please set the JAVA_HOME variable in your environment to match the
echo location of your Java installation.
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if "%ERRORLEVEL%"=="0" goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
exit /b 1
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

3
settings.gradle Normal file
View File

@ -0,0 +1,3 @@
rootProject.name = 'pdf-parsers'
include 'caruna-invoice'