diff --git a/.gitignore b/.gitignore index bffa164..e0d53d8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .gradle .idea +build diff --git a/README.md b/README.md index dd60e8f..9160e14 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,18 @@ PDF parser to parse Caruna invoices. Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"`` - ### Helen Invoice Parser PDF parser to parse Helen invoices. -Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf"`` +Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf :dayKwh :nightKwh"`` + + +### Helen/Caruna Composite Parser +PDF parser to combine Helen/Caruna invoices into single lines + +Usage: ``./gradlew gradle :composite-parsers:run --args=".pdf .pdf""`` + + diff --git a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class deleted file mode 100644 index 3d50a8a..0000000 Binary files a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class and /dev/null differ diff --git a/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt b/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt deleted file mode 100644 index 9ed8ac7..0000000 --- a/caruna-invoice/build/tmp/compileJava/source-classes-mapping.txt +++ /dev/null @@ -1,2 +0,0 @@ -com/devsoap/parsers/caruna/Parser.java - com.devsoap.parsers.caruna.Parser diff --git a/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java b/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java similarity index 58% rename from caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java rename to caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java index 1ad5363..747077f 100644 --- a/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java +++ b/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java @@ -1,19 +1,17 @@ -package com.devsoap.parsers.helen; +package com.devsoap.parsers.caruna; import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; -import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.FormatStyle; import java.time.format.TextStyle; -import java.util.Locale; -import java.util.Objects; -import java.util.Scanner; +import java.util.*; import java.util.regex.Pattern; public class Parser { @@ -29,78 +27,81 @@ public class Parser { .ofLocalizedDate(FormatStyle.SHORT) .withLocale(FI_LOCALE); + public static class Period { + public double basicPay = 0.0; + public double transferDayPrice = 0.0; + public double transferDayTotal = 0.0; + public int transferDayKwh = 0; + public double transferNightPrice = 0.0; + public double transferNightTotal = 0.0; + public int transferNightKwh = 0; + public double tax = 0.0; + } + public static void main(String[] args) { var filename = args[0]; var file = Paths.get(filename); + + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + parse(file).forEach((n,p) -> { + var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f", + n, p.basicPay, p.transferDayKwh, p.transferDayTotal, p.transferNightKwh, p.transferNightTotal, p.tax); + System.out.println(csv); + }); + } + + public static Map parse(Path file) { try(var reader = new PdfReader(file.toFile())) { var document = new PdfDocument(reader); var page2 = document.getPage(2); var text = PdfTextExtractor.getTextFromPage(page2); - var scanner = new Scanner(text); - var month = ""; - var basicPay = 0.0; - var transferDayPrice = 0.0; - var transferDayTotal = 0.0; - var transforDayKwh = 0L; - var transferNightPrice = 0.0; - var transferNightTotal = 0.0; - var transforNightKwh = 0L; - var tax = 0.0; - - System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + - "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + - ",Yösiirto (EUR),Vero"); + var periods = new HashMap(); + Period currentPeriod = null; while(scanner.hasNextLine()) { var line = scanner.nextLine(); if(DATE_RANGE_PATTERN.asPredicate().test(line)) { - if(!Objects.equals(month, "")) { - var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f", - month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax); - System.out.println(csv); - } - var matcher = DATE_RANGE_PATTERN.matcher(line); while(matcher.find()) { - month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + var month = LocalDate.from( FI_DATE.parse(matcher.group(1))) .getMonth() .getDisplayName(TextStyle.FULL, new Locale("FI","fi")); month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + currentPeriod = periods.computeIfAbsent(month, s -> new Period()); } } else if(PERUSMAKSU_PATTERN.asPredicate().test(line)) { var matcher = PERUSMAKSU_PATTERN.matcher(line); while (matcher.find()) { - basicPay = Double.parseDouble(matcher.group(1).replace(",", ".")); + currentPeriod.basicPay = Double.parseDouble(matcher.group(1).replace(",", ".")); } } else if(P_SIIRTO_PATTERN.asPredicate().test(line)) { var matcher = P_SIIRTO_PATTERN.matcher(line); while (matcher.find()) { - transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; - transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); - transforDayKwh = Math.round(transferDayTotal / transferDayPrice); + currentPeriod.transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; + currentPeriod.transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); + currentPeriod.transferDayKwh = (int) Math.round(currentPeriod.transferDayTotal / currentPeriod.transferDayPrice); } } else if(O_SIIRTO_PATTERN.asPredicate().test(line)) { var matcher = O_SIIRTO_PATTERN.matcher(line); while (matcher.find()) { - transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; - transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); - transforNightKwh = Math.round(transferNightTotal / transferNightPrice); + currentPeriod.transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0; + currentPeriod.transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", ".")); + currentPeriod.transferNightKwh = (int) Math.round(currentPeriod.transferNightTotal / currentPeriod.transferNightPrice); } } else if(TAX_PATTERN.asPredicate().test(line)) { var matcher = TAX_PATTERN.matcher(line); while (matcher.find()) { - tax = Double.parseDouble(matcher.group(1).replace(",", ".")); + currentPeriod.tax = Double.parseDouble(matcher.group(1).replace(",", ".")); } } } - var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f", - month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax); - System.out.println(csv); - } catch (FileNotFoundException e) { - e.printStackTrace(); + return periods; } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeException(e); } } } diff --git a/composite-parsers/build.gradle b/composite-parsers/build.gradle new file mode 100644 index 0000000..0ad4b05 --- /dev/null +++ b/composite-parsers/build.gradle @@ -0,0 +1,6 @@ +dependencies { + implementation project(':caruna-invoice') + implementation project(':helen-invoice') +} + +application.mainClass='com.devsoap.parsers.composites.CarunaHelenParser' diff --git a/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java b/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java new file mode 100644 index 0000000..0e73d54 --- /dev/null +++ b/composite-parsers/src/main/java/com/devsoap/parsers/composites/CarunaHelenParser.java @@ -0,0 +1,40 @@ +package com.devsoap.parsers.composites; + +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Map; +import java.util.stream.Collectors; + +public class CarunaHelenParser { + + public static void main(String[] args) { + var carunaFile = Path.of("/home/john/Downloads/caruna-lasku.pdf"); + var helenFile = Path.of("/home/john/Downloads/helen-lasku.pdf"); + + var carunaPeriods = com.devsoap.parsers.caruna.Parser.parse(carunaFile); + var nightSiirto = carunaPeriods.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().transferNightKwh)); + var daySiirto = carunaPeriods.entrySet().stream() + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().transferDayKwh)); + var helenPeriods = com.devsoap.parsers.helen.Parser.parse(helenFile, daySiirto, nightSiirto); + + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + var months = new HashSet<>(carunaPeriods.keySet()); + months.addAll(helenPeriods.keySet()); + months.forEach(month -> { + var hp = helenPeriods.getOrDefault(month, new com.devsoap.parsers.helen.Parser.Period()); + var cp = carunaPeriods.getOrDefault(month, new com.devsoap.parsers.caruna.Parser.Period()); + + var csv = String.format("%s,%.02f,%.02f,%d,%.02f,%d,%.02f,%d,%.02f,%d,%.02f,%.02f", + month, hp.basicPay, cp.basicPay, hp.dayEnergy, hp.dayEnergyEur, hp.nightEnergy, + hp.nightEnergyEur, cp.transferDayKwh, cp.transferDayTotal, cp.transferNightKwh, + cp.transferNightTotal, cp.tax); + csv = csv.replace(",0,",",,").replace(",0.00",","); + System.out.println(csv); + }); + } +} \ No newline at end of file diff --git a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class deleted file mode 100644 index 7a6c516..0000000 Binary files a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class and /dev/null differ diff --git a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class deleted file mode 100644 index d059f42..0000000 Binary files a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class and /dev/null differ diff --git a/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt b/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt deleted file mode 100644 index 7b6ed76..0000000 --- a/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt +++ /dev/null @@ -1,3 +0,0 @@ -com/devsoap/parsers/helen/Parser.java - com.devsoap.parsers.helen.Parser - com.devsoap.parsers.helen.Parser$Period diff --git a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java index d7726d9..640f216 100644 --- a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java +++ b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -6,17 +6,15 @@ import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Path; import java.nio.file.Paths; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.FormatStyle; import java.time.format.TextStyle; -import java.util.HashMap; -import java.util.Locale; -import java.util.Objects; -import java.util.Scanner; -import java.util.function.Function; +import java.util.*; import java.util.regex.Pattern; +import java.util.stream.Collectors; public class Parser { @@ -31,46 +29,44 @@ public class Parser { .ofLocalizedDate(FormatStyle.SHORT) .withLocale(FI_LOCALE); - - - static class Period { - double basicPay = 0.0; - int dayEnergy = 0; - int nightEnergy = 0; - double dayEnergyEur = 0.0; - double nightEnergyEur = 0.0; - - @Override - public String toString() { - return "Period{" + - "basicPay=" + basicPay + - ", dayEnergy=" + dayEnergy + - ", nightEnergy=" + nightEnergy + - ", dayEnergyEur=" + dayEnergyEur + - ", nightEnergyEur=" + nightEnergyEur + - '}'; - } + public static class Period { + public double basicPay = 0.0; + public int dayEnergy = 0; + public int nightEnergy = 0; + public double dayEnergyEur = 0.0; + public double nightEnergyEur = 0.0; } public static void main(String[] args) { - var filename = args[0]; - var daySiirtoKwh = 937; - var nightSiirtoKwh = 920; + var filename = Path.of(args[0]); + var daySiirtoKwh = Arrays + .stream(args[1].split(",")) + .map(period -> period.split(":")) + .collect(Collectors.toMap(values -> values[0], values -> Integer.parseInt(values[1]))); + var nightSiirtoKwh = Arrays + .stream(args[2].split(",")) + .map(period -> period.split(":")) + .collect(Collectors.toMap(values -> values[0], values -> Integer.parseInt(values[1]))); - var file = Paths.get(filename); + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + parse(filename,daySiirtoKwh, nightSiirtoKwh).forEach((month,period ) -> { + var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, + period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur); + System.out.println(csv); + }); + } + + public static Map parse(Path file, Map daySiirtoKwh, + Map nightSiirtoKwh) { try(var reader = new PdfReader(file.toFile())) { var document = new PdfDocument(reader); var page2 = document.getPage(2); var text = PdfTextExtractor.getTextFromPage(page2); - var scanner = new Scanner(text); - - System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + - "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + - ",Yösiirto (EUR),Vero"); - var periods = new HashMap(); - while(scanner.hasNextLine()) { var line = scanner.nextLine(); if(PERUSMAKSU_PATTERN.asPredicate().test(line)) { @@ -99,28 +95,22 @@ public class Parser { var eurPerKwh = Double.parseDouble(matcher.group(4) .replace(",", ".")) / 100.0; + var nightSiirto = nightSiirtoKwh.getOrDefault(month,0); + var daySriirto = daySiirtoKwh.getOrDefault(month,0); periods.computeIfAbsent(month, s -> new Period()); periods.computeIfPresent(month, (s,p) -> { - p.dayEnergy = totalEnergy - nightSiirtoKwh; + p.dayEnergy = totalEnergy - nightSiirto; p.dayEnergyEur = p.dayEnergy * eurPerKwh; - p.nightEnergy = totalEnergy - daySiirtoKwh; + p.nightEnergy = totalEnergy - daySriirto; p.nightEnergyEur = p.nightEnergy * eurPerKwh; return p; }); } } } - - periods.forEach((month,period ) -> { - var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, period.basicPay, - period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur); - System.out.println(csv); - }); - - } catch (FileNotFoundException e) { - e.printStackTrace(); + return periods; } catch (IOException e) { - e.printStackTrace(); + throw new RuntimeException(e); } } } diff --git a/settings.gradle b/settings.gradle index fba2639..ea42789 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,4 +1,5 @@ rootProject.name = 'pdf-parsers' include 'caruna-invoice' include 'helen-invoice' +include 'composite-parsers'