diff --git a/README.md b/README.md index 7e6b0b4..dd60e8f 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,13 @@ This project contains different PDF parsers for my personal use. Feel free to co ### Caruna Invoice Parser PDF parser to parse Caruna invoices. -Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"`` \ No newline at end of file +Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"`` + + +### Helen Invoice Parser +PDF parser to parse Helen invoices. + +Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf"`` + + + diff --git a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class index ab92d8d..3d50a8a 100644 Binary files a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class and b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class differ diff --git a/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java b/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java similarity index 96% rename from caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java rename to caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java index 285b003..1ad5363 100644 --- a/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java +++ b/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -1,25 +1,19 @@ -package com.devsoap.parsers.caruna; +package com.devsoap.parsers.helen; import com.itextpdf.kernel.pdf.PdfDocument; -import com.itextpdf.kernel.pdf.PdfPage; import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.file.Path; import java.nio.file.Paths; -import java.text.DateFormat; -import java.text.ParseException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.FormatStyle; import java.time.format.TextStyle; -import java.util.Date; import java.util.Locale; import java.util.Objects; import java.util.Scanner; -import java.util.regex.Matcher; import java.util.regex.Pattern; public class Parser { diff --git a/helen-invoice/build.gradle b/helen-invoice/build.gradle new file mode 100644 index 0000000..f1d9f1e --- /dev/null +++ b/helen-invoice/build.gradle @@ -0,0 +1 @@ +application.mainClass='com.devsoap.parsers.helen.Parser' diff --git a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class new file mode 100644 index 0000000..7a6c516 Binary files /dev/null and b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class differ diff --git a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class new file mode 100644 index 0000000..d059f42 Binary files /dev/null and b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class differ diff --git a/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt b/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt new file mode 100644 index 0000000..7b6ed76 --- /dev/null +++ b/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt @@ -0,0 +1,3 @@ +com/devsoap/parsers/helen/Parser.java + com.devsoap.parsers.helen.Parser + com.devsoap.parsers.helen.Parser$Period diff --git a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java new file mode 100644 index 0000000..d7726d9 --- /dev/null +++ b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -0,0 +1,126 @@ +package com.devsoap.parsers.helen; + +import com.itextpdf.kernel.pdf.PdfDocument; +import com.itextpdf.kernel.pdf.PdfReader; +import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.time.format.TextStyle; +import java.util.HashMap; +import java.util.Locale; +import java.util.Objects; +import java.util.Scanner; +import java.util.function.Function; +import java.util.regex.Pattern; + +public class Parser { + + private static final Pattern PERUSMAKSU_PATTERN = Pattern.compile( + "perusmaksu (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d).* (\\d*,\\d\\d) e"); + private static final Pattern ENERGIA_PATTERN = Pattern.compile( + "energia (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d) ([0-9 ]*) kWh (\\d+,\\d\\d) c"); + + + private static final Locale FI_LOCALE = new Locale("FI", "fi"); + private static final DateTimeFormatter FI_DATE = DateTimeFormatter + .ofLocalizedDate(FormatStyle.SHORT) + .withLocale(FI_LOCALE); + + + + static class Period { + double basicPay = 0.0; + int dayEnergy = 0; + int nightEnergy = 0; + double dayEnergyEur = 0.0; + double nightEnergyEur = 0.0; + + @Override + public String toString() { + return "Period{" + + "basicPay=" + basicPay + + ", dayEnergy=" + dayEnergy + + ", nightEnergy=" + nightEnergy + + ", dayEnergyEur=" + dayEnergyEur + + ", nightEnergyEur=" + nightEnergyEur + + '}'; + } + } + + public static void main(String[] args) { + var filename = args[0]; + var daySiirtoKwh = 937; + var nightSiirtoKwh = 920; + + var file = Paths.get(filename); + try(var reader = new PdfReader(file.toFile())) { + var document = new PdfDocument(reader); + var page2 = document.getPage(2); + var text = PdfTextExtractor.getTextFromPage(page2); + + var scanner = new Scanner(text); + + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + var periods = new HashMap(); + + while(scanner.hasNextLine()) { + var line = scanner.nextLine(); + if(PERUSMAKSU_PATTERN.asPredicate().test(line)) { + var matcher = PERUSMAKSU_PATTERN.matcher(line); + while (matcher.find()) { + var month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + .getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi")); + month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + var basicPay = Double.parseDouble(matcher.group(3).replace(",", ".")); + + periods.computeIfAbsent(month, s -> new Period()); + periods.computeIfPresent(month, (s,p) -> { + p.basicPay =basicPay; + return p; + }); + } + } else if(ENERGIA_PATTERN.asPredicate().test(line)) { + var matcher = ENERGIA_PATTERN.matcher(line); + while (matcher.find()) { + var month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + .getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi")); + month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + + var totalEnergy = Integer.parseInt(matcher.group(3) + .replace(" ", "")); + var eurPerKwh = Double.parseDouble(matcher.group(4) + .replace(",", ".")) / 100.0; + + periods.computeIfAbsent(month, s -> new Period()); + periods.computeIfPresent(month, (s,p) -> { + p.dayEnergy = totalEnergy - nightSiirtoKwh; + p.dayEnergyEur = p.dayEnergy * eurPerKwh; + p.nightEnergy = totalEnergy - daySiirtoKwh; + p.nightEnergyEur = p.nightEnergy * eurPerKwh; + return p; + }); + } + } + } + + periods.forEach((month,period ) -> { + var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, period.basicPay, + period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur); + System.out.println(csv); + }); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/settings.gradle b/settings.gradle index b9d1d89..fba2639 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,4 @@ rootProject.name = 'pdf-parsers' include 'caruna-invoice' +include 'helen-invoice'