From 6a7c3f381598552e0b9fd1d6646ef97eb95473b3 Mon Sep 17 00:00:00 2001 From: John Ahlroos Date: Fri, 28 May 2021 15:02:21 +0300 Subject: [PATCH] Add plugsurfing invoice parser --- README.md | 5 +- .../parsers/composites/CarunaHelenParser.java | 1 - .../com/devsoap/parsers/helen/Parser.java | 11 -- plugsurfing-invoice/build.gradle | 1 + .../devsoap/parsers/plugsurfing/Parser.java | 101 ++++++++++++++++++ settings.gradle | 1 + 6 files changed, 107 insertions(+), 13 deletions(-) create mode 100644 plugsurfing-invoice/build.gradle create mode 100644 plugsurfing-invoice/src/main/java/com/devsoap/parsers/plugsurfing/Parser.java diff --git a/README.md b/README.md index 0bb4011..863fe58 100644 --- a/README.md +++ b/README.md @@ -12,12 +12,15 @@ PDF parser to parse Helen invoices. Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf :dayKwh :nightKwh"`` - ### Helen/Caruna Composite Parser PDF parser to combine Helen/Caruna invoices into single lines Usage: ``./gradlew :composite-parsers:run --args=".pdf .pdf""`` +### Plugsurfing Invoice Parser +PDF parser to parse Plugsurfing electrical monthly bills + +Usage: ``./gradlew :plugsurfing-invoice:run --args="/path/to/pdf"`` diff --git a/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java b/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java index caeb09c..718550a 100644 --- a/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java +++ b/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java @@ -1,7 +1,6 @@ package com.devsoap.parsers.composites; import java.nio.file.Path; -import java.util.ArrayList; import java.util.HashSet; import java.util.Map; import java.util.stream.Collectors; diff --git a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java index 8e192b2..370c885 100644 --- a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java +++ b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -35,17 +35,6 @@ public class Parser { public int nightEnergy = 0; public double dayEnergyEur = 0.0; public double nightEnergyEur = 0.0; - - @Override - public String toString() { - return "Period{" + - "basicPay=" + basicPay + - ", dayEnergy=" + dayEnergy + - ", nightEnergy=" + nightEnergy + - ", dayEnergyEur=" + dayEnergyEur + - ", nightEnergyEur=" + nightEnergyEur + - '}'; - } } public static void main(String[] args) { diff --git a/plugsurfing-invoice/build.gradle b/plugsurfing-invoice/build.gradle new file mode 100644 index 0000000..79b012c --- /dev/null +++ b/plugsurfing-invoice/build.gradle @@ -0,0 +1 @@ +application.mainClass='com.devsoap.parsers.plugsurfing.Parser' \ No newline at end of file diff --git a/plugsurfing-invoice/src/main/java/com/devsoap/parsers/plugsurfing/Parser.java b/plugsurfing-invoice/src/main/java/com/devsoap/parsers/plugsurfing/Parser.java new file mode 100644 index 0000000..444649c --- /dev/null +++ b/plugsurfing-invoice/src/main/java/com/devsoap/parsers/plugsurfing/Parser.java @@ -0,0 +1,101 @@ +package com.devsoap.parsers.plugsurfing; + +import com.itextpdf.kernel.pdf.PdfDocument; +import com.itextpdf.kernel.pdf.PdfReader; +import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; + +import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.text.NumberFormat; +import java.text.ParseException; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.LocalTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.regex.Pattern; + +public class Parser { + + private static final Pattern DATE_TIME_KWH_DURATION_PATTERN = Pattern.compile("(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}) \\((.*), (.*)kWh\\)"); + private static final Pattern QUANTITY_PATTERN = Pattern.compile("(\\d+) (\\d+,\\d+) (\\d+)% (\\d+,\\d+) €"); + + private static final Locale FI_LOCALE = new Locale("FI", "fi"); + private static final DateTimeFormatter FI_DATE = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").withLocale(FI_LOCALE); + private static final DateTimeFormatter FI_TIME = DateTimeFormatter.ofPattern("HH:mm:ss").withLocale(FI_LOCALE); + private static final DateTimeFormatter KEY_FORMATTER = DateTimeFormatter.ofPattern("yyyy-M"); + private static final NumberFormat GERMANY_AMOUNT = NumberFormat.getNumberInstance(Locale.FRANCE); + + public static void main(String[] args) { + var filename = args[0]; + var file = Paths.get(filename); + System.out.println("Vuosi,Kuukausi,Latauksia,Perusmaksu(€),Total(€),Energia(kwh)"); + parse(file).forEach((month, sessions) -> System.out.println(sessions.stream() + .reduce(Session::add) + .map(session -> String.format("%s,%s,%d,%.02f,%.02f,%.02f", + month.split("-")[0], month.split("-")[1], + session.quantity, session.unitPrice, session.amountEur, session.kwh)) + .orElseThrow())); + } + + public static class Session { + public LocalDateTime timestamp; + public Duration duration; + public int quantity = 0; + public double unitPrice = 0; + public double taxRate = 0; + public double amountEur = 0; + public double kwh; + public Session add(Session session) { + this.duration = this.duration.plus(session.duration); + this.quantity += session.quantity; + this.unitPrice += this.unitPrice; + this.taxRate = Math.max(this.taxRate, session.taxRate); + this.amountEur += session.amountEur; + this.kwh += session.kwh; + return this; + } + } + + public static Map> parse(Path file) { + var sessions = new HashMap>(); + try (var reader = new PdfReader(file.toFile())) { + var document = new PdfDocument(reader); + Session session = null; + for (var pageIndex=1; pageIndex <= document.getNumberOfPages(); pageIndex++) { + var page = document.getPage(pageIndex); + var text = PdfTextExtractor.getTextFromPage(page); + var scanner = new Scanner(text); + while(scanner.hasNextLine()) { + var line = scanner.nextLine(); + if (DATE_TIME_KWH_DURATION_PATTERN.asPredicate().test(line)) { + var matcher = DATE_TIME_KWH_DURATION_PATTERN.matcher(line); + session = new Session(); + while(matcher.find()) { + session.timestamp = LocalDateTime.parse(matcher.group(1), FI_DATE).withSecond(0); + session.duration = Duration.between(LocalTime.of(0,0,0), LocalTime.from(FI_TIME.parse(matcher.group(2)))); + session.kwh = Double.parseDouble(matcher.group(3)); + } + } else if (session != null && QUANTITY_PATTERN.asPredicate().test(line)) { + var matcher = QUANTITY_PATTERN.matcher(line); + while(matcher.find()) { + session.quantity = Integer.parseInt(matcher.group(1)); + session.unitPrice = GERMANY_AMOUNT.parse(matcher.group(2)).doubleValue(); + session.taxRate = Double.parseDouble(matcher.group(3)) / 100.0; + session.amountEur = GERMANY_AMOUNT.parse(matcher.group(4)).doubleValue(); + sessions.computeIfAbsent( + KEY_FORMATTER.format(session.timestamp), + (k) -> new ArrayList<>()) + .add(session); + } + session = null; + } + } + } + } catch (IOException | ParseException e) { + throw new RuntimeException(e); + } + return sessions; + } +} diff --git a/settings.gradle b/settings.gradle index ea42789..c2c8030 100644 --- a/settings.gradle +++ b/settings.gradle @@ -2,4 +2,5 @@ rootProject.name = 'pdf-parsers' include 'caruna-invoice' include 'helen-invoice' include 'composite-parsers' +include 'plugsurfing-invoice'