Add Caruna/Helen composite parser
This commit is contained in:
parent
9e883631ec
commit
8cd1376240
|
@ -1,2 +1,3 @@
|
|||
.gradle
|
||||
.idea
|
||||
build
|
||||
|
|
11
README.md
11
README.md
|
@ -7,11 +7,18 @@ PDF parser to parse Caruna invoices.
|
|||
|
||||
Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"``
|
||||
|
||||
|
||||
### Helen Invoice Parser
|
||||
PDF parser to parse Helen invoices.
|
||||
|
||||
Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf"``
|
||||
Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf <month>:dayKwh <month>:nightKwh"``
|
||||
|
||||
|
||||
### Helen/Caruna Composite Parser
|
||||
PDF parser to combine Helen/Caruna invoices into single lines
|
||||
|
||||
Usage: ``./gradlew gradle :composite-parsers:run --args="<caruna invoice>.pdf <helen-invoice>.pdf""``
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Binary file not shown.
|
@ -1,2 +0,0 @@
|
|||
com/devsoap/parsers/caruna/Parser.java
|
||||
com.devsoap.parsers.caruna.Parser
|
|
@ -1,19 +1,17 @@
|
|||
package com.devsoap.parsers.helen;
|
||||
package com.devsoap.parsers.caruna;
|
||||
|
||||
import com.itextpdf.kernel.pdf.PdfDocument;
|
||||
import com.itextpdf.kernel.pdf.PdfReader;
|
||||
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.FormatStyle;
|
||||
import java.time.format.TextStyle;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Scanner;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class Parser {
|
||||
|
@ -29,78 +27,81 @@ public class Parser {
|
|||
.ofLocalizedDate(FormatStyle.SHORT)
|
||||
.withLocale(FI_LOCALE);
|
||||
|
||||
public static class Period {
|
||||
public double basicPay = 0.0;
|
||||
public double transferDayPrice = 0.0;
|
||||
public double transferDayTotal = 0.0;
|
||||
public int transferDayKwh = 0;
|
||||
public double transferNightPrice = 0.0;
|
||||
public double transferNightTotal = 0.0;
|
||||
public int transferNightKwh = 0;
|
||||
public double tax = 0.0;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
var filename = args[0];
|
||||
var file = Paths.get(filename);
|
||||
|
||||
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
|
||||
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
|
||||
",Yösiirto (EUR),Vero");
|
||||
|
||||
parse(file).forEach((n,p) -> {
|
||||
var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f",
|
||||
n, p.basicPay, p.transferDayKwh, p.transferDayTotal, p.transferNightKwh, p.transferNightTotal, p.tax);
|
||||
System.out.println(csv);
|
||||
});
|
||||
}
|
||||
|
||||
public static Map<String, Period> parse(Path file) {
|
||||
try(var reader = new PdfReader(file.toFile())) {
|
||||
var document = new PdfDocument(reader);
|
||||
var page2 = document.getPage(2);
|
||||
var text = PdfTextExtractor.getTextFromPage(page2);
|
||||
|
||||
var scanner = new Scanner(text);
|
||||
var month = "";
|
||||
var basicPay = 0.0;
|
||||
var transferDayPrice = 0.0;
|
||||
var transferDayTotal = 0.0;
|
||||
var transforDayKwh = 0L;
|
||||
var transferNightPrice = 0.0;
|
||||
var transferNightTotal = 0.0;
|
||||
var transforNightKwh = 0L;
|
||||
var tax = 0.0;
|
||||
|
||||
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
|
||||
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
|
||||
",Yösiirto (EUR),Vero");
|
||||
var periods = new HashMap<String, Period>();
|
||||
|
||||
Period currentPeriod = null;
|
||||
while(scanner.hasNextLine()) {
|
||||
var line = scanner.nextLine();
|
||||
if(DATE_RANGE_PATTERN.asPredicate().test(line)) {
|
||||
if(!Objects.equals(month, "")) {
|
||||
var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f",
|
||||
month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax);
|
||||
System.out.println(csv);
|
||||
}
|
||||
|
||||
var matcher = DATE_RANGE_PATTERN.matcher(line);
|
||||
while(matcher.find()) {
|
||||
month = LocalDate.from( FI_DATE.parse(matcher.group(1)))
|
||||
var month = LocalDate.from( FI_DATE.parse(matcher.group(1)))
|
||||
.getMonth()
|
||||
.getDisplayName(TextStyle.FULL, new Locale("FI","fi"));
|
||||
month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2);
|
||||
currentPeriod = periods.computeIfAbsent(month, s -> new Period());
|
||||
}
|
||||
} else if(PERUSMAKSU_PATTERN.asPredicate().test(line)) {
|
||||
var matcher = PERUSMAKSU_PATTERN.matcher(line);
|
||||
while (matcher.find()) {
|
||||
basicPay = Double.parseDouble(matcher.group(1).replace(",", "."));
|
||||
currentPeriod.basicPay = Double.parseDouble(matcher.group(1).replace(",", "."));
|
||||
}
|
||||
} else if(P_SIIRTO_PATTERN.asPredicate().test(line)) {
|
||||
var matcher = P_SIIRTO_PATTERN.matcher(line);
|
||||
while (matcher.find()) {
|
||||
transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
|
||||
transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
|
||||
transforDayKwh = Math.round(transferDayTotal / transferDayPrice);
|
||||
currentPeriod.transferDayPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
|
||||
currentPeriod.transferDayTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
|
||||
currentPeriod.transferDayKwh = (int) Math.round(currentPeriod.transferDayTotal / currentPeriod.transferDayPrice);
|
||||
}
|
||||
} else if(O_SIIRTO_PATTERN.asPredicate().test(line)) {
|
||||
var matcher = O_SIIRTO_PATTERN.matcher(line);
|
||||
while (matcher.find()) {
|
||||
transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
|
||||
transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
|
||||
transforNightKwh = Math.round(transferNightTotal / transferNightPrice);
|
||||
currentPeriod.transferNightPrice = Double.parseDouble(matcher.group(1).replace(",", ".")) / 100.0;
|
||||
currentPeriod.transferNightTotal = Double.parseDouble(matcher.group(2).replace(",", "."));
|
||||
currentPeriod.transferNightKwh = (int) Math.round(currentPeriod.transferNightTotal / currentPeriod.transferNightPrice);
|
||||
}
|
||||
} else if(TAX_PATTERN.asPredicate().test(line)) {
|
||||
var matcher = TAX_PATTERN.matcher(line);
|
||||
while (matcher.find()) {
|
||||
tax = Double.parseDouble(matcher.group(1).replace(",", "."));
|
||||
currentPeriod.tax = Double.parseDouble(matcher.group(1).replace(",", "."));
|
||||
}
|
||||
}
|
||||
}
|
||||
var csv = String.format("%s,,%.02f,,,,,%d, %.02f,%d, %.02f, %.02f",
|
||||
month, basicPay, transforDayKwh, transferDayTotal, transforNightKwh, transferNightTotal, tax);
|
||||
System.out.println(csv);
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
return periods;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
dependencies {
|
||||
implementation project(':caruna-invoice')
|
||||
implementation project(':helen-invoice')
|
||||
}
|
||||
|
||||
application.mainClass='com.devsoap.parsers.composites.CarunaHelenParser'
|
|
@ -0,0 +1,40 @@
|
|||
package com.devsoap.parsers.composites;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class CarunaHelenParser {
|
||||
|
||||
public static void main(String[] args) {
|
||||
var carunaFile = Path.of("/home/john/Downloads/caruna-lasku.pdf");
|
||||
var helenFile = Path.of("/home/john/Downloads/helen-lasku.pdf");
|
||||
|
||||
var carunaPeriods = com.devsoap.parsers.caruna.Parser.parse(carunaFile);
|
||||
var nightSiirto = carunaPeriods.entrySet().stream()
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().transferNightKwh));
|
||||
var daySiirto = carunaPeriods.entrySet().stream()
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().transferDayKwh));
|
||||
var helenPeriods = com.devsoap.parsers.helen.Parser.parse(helenFile, daySiirto, nightSiirto);
|
||||
|
||||
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
|
||||
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
|
||||
",Yösiirto (EUR),Vero");
|
||||
|
||||
var months = new HashSet<>(carunaPeriods.keySet());
|
||||
months.addAll(helenPeriods.keySet());
|
||||
months.forEach(month -> {
|
||||
var hp = helenPeriods.getOrDefault(month, new com.devsoap.parsers.helen.Parser.Period());
|
||||
var cp = carunaPeriods.getOrDefault(month, new com.devsoap.parsers.caruna.Parser.Period());
|
||||
|
||||
var csv = String.format("%s,%.02f,%.02f,%d,%.02f,%d,%.02f,%d,%.02f,%d,%.02f,%.02f",
|
||||
month, hp.basicPay, cp.basicPay, hp.dayEnergy, hp.dayEnergyEur, hp.nightEnergy,
|
||||
hp.nightEnergyEur, cp.transferDayKwh, cp.transferDayTotal, cp.transferNightKwh,
|
||||
cp.transferNightTotal, cp.tax);
|
||||
csv = csv.replace(",0,",",,").replace(",0.00",",");
|
||||
System.out.println(csv);
|
||||
});
|
||||
}
|
||||
}
|
Binary file not shown.
Binary file not shown.
|
@ -1,3 +0,0 @@
|
|||
com/devsoap/parsers/helen/Parser.java
|
||||
com.devsoap.parsers.helen.Parser
|
||||
com.devsoap.parsers.helen.Parser$Period
|
|
@ -6,17 +6,15 @@ import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
|
|||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.FormatStyle;
|
||||
import java.time.format.TextStyle;
|
||||
import java.util.HashMap;
|
||||
import java.util.Locale;
|
||||
import java.util.Objects;
|
||||
import java.util.Scanner;
|
||||
import java.util.function.Function;
|
||||
import java.util.*;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class Parser {
|
||||
|
||||
|
@ -31,46 +29,44 @@ public class Parser {
|
|||
.ofLocalizedDate(FormatStyle.SHORT)
|
||||
.withLocale(FI_LOCALE);
|
||||
|
||||
|
||||
|
||||
static class Period {
|
||||
double basicPay = 0.0;
|
||||
int dayEnergy = 0;
|
||||
int nightEnergy = 0;
|
||||
double dayEnergyEur = 0.0;
|
||||
double nightEnergyEur = 0.0;
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Period{" +
|
||||
"basicPay=" + basicPay +
|
||||
", dayEnergy=" + dayEnergy +
|
||||
", nightEnergy=" + nightEnergy +
|
||||
", dayEnergyEur=" + dayEnergyEur +
|
||||
", nightEnergyEur=" + nightEnergyEur +
|
||||
'}';
|
||||
}
|
||||
public static class Period {
|
||||
public double basicPay = 0.0;
|
||||
public int dayEnergy = 0;
|
||||
public int nightEnergy = 0;
|
||||
public double dayEnergyEur = 0.0;
|
||||
public double nightEnergyEur = 0.0;
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
var filename = args[0];
|
||||
var daySiirtoKwh = 937;
|
||||
var nightSiirtoKwh = 920;
|
||||
var filename = Path.of(args[0]);
|
||||
var daySiirtoKwh = Arrays
|
||||
.stream(args[1].split(","))
|
||||
.map(period -> period.split(":"))
|
||||
.collect(Collectors.toMap(values -> values[0], values -> Integer.parseInt(values[1])));
|
||||
var nightSiirtoKwh = Arrays
|
||||
.stream(args[2].split(","))
|
||||
.map(period -> period.split(":"))
|
||||
.collect(Collectors.toMap(values -> values[0], values -> Integer.parseInt(values[1])));
|
||||
|
||||
var file = Paths.get(filename);
|
||||
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
|
||||
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
|
||||
",Yösiirto (EUR),Vero");
|
||||
|
||||
parse(filename,daySiirtoKwh, nightSiirtoKwh).forEach((month,period ) -> {
|
||||
var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month,
|
||||
period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur);
|
||||
System.out.println(csv);
|
||||
});
|
||||
}
|
||||
|
||||
public static Map<String, Period> parse(Path file, Map<String, Integer> daySiirtoKwh,
|
||||
Map<String, Integer> nightSiirtoKwh) {
|
||||
try(var reader = new PdfReader(file.toFile())) {
|
||||
var document = new PdfDocument(reader);
|
||||
var page2 = document.getPage(2);
|
||||
var text = PdfTextExtractor.getTextFromPage(page2);
|
||||
|
||||
var scanner = new Scanner(text);
|
||||
|
||||
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
|
||||
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
|
||||
",Yösiirto (EUR),Vero");
|
||||
|
||||
var periods = new HashMap<String, Period>();
|
||||
|
||||
while(scanner.hasNextLine()) {
|
||||
var line = scanner.nextLine();
|
||||
if(PERUSMAKSU_PATTERN.asPredicate().test(line)) {
|
||||
|
@ -99,28 +95,22 @@ public class Parser {
|
|||
var eurPerKwh = Double.parseDouble(matcher.group(4)
|
||||
.replace(",", ".")) / 100.0;
|
||||
|
||||
var nightSiirto = nightSiirtoKwh.getOrDefault(month,0);
|
||||
var daySriirto = daySiirtoKwh.getOrDefault(month,0);
|
||||
periods.computeIfAbsent(month, s -> new Period());
|
||||
periods.computeIfPresent(month, (s,p) -> {
|
||||
p.dayEnergy = totalEnergy - nightSiirtoKwh;
|
||||
p.dayEnergy = totalEnergy - nightSiirto;
|
||||
p.dayEnergyEur = p.dayEnergy * eurPerKwh;
|
||||
p.nightEnergy = totalEnergy - daySiirtoKwh;
|
||||
p.nightEnergy = totalEnergy - daySriirto;
|
||||
p.nightEnergyEur = p.nightEnergy * eurPerKwh;
|
||||
return p;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
periods.forEach((month,period ) -> {
|
||||
var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, period.basicPay,
|
||||
period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur);
|
||||
System.out.println(csv);
|
||||
});
|
||||
|
||||
} catch (FileNotFoundException e) {
|
||||
e.printStackTrace();
|
||||
return periods;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
rootProject.name = 'pdf-parsers'
|
||||
include 'caruna-invoice'
|
||||
include 'helen-invoice'
|
||||
include 'composite-parsers'
|
||||
|
||||
|
|
Loading…
Reference in New Issue