1
0
Fork 0

Add plugsurfing invoice parser

This commit is contained in:
John Ahlroos 2021-05-28 15:02:21 +03:00
parent 8d75f2cb48
commit 6a7c3f3815
Signed by: john
GPG Key ID: 258D0F70DB84CD5D
6 changed files with 107 additions and 13 deletions

View File

@ -12,12 +12,15 @@ PDF parser to parse Helen invoices.
Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf <month>:dayKwh <month>:nightKwh"`` Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf <month>:dayKwh <month>:nightKwh"``
### Helen/Caruna Composite Parser ### Helen/Caruna Composite Parser
PDF parser to combine Helen/Caruna invoices into single lines PDF parser to combine Helen/Caruna invoices into single lines
Usage: ``./gradlew :composite-parsers:run --args="<caruna invoice>.pdf <helen-invoice>.pdf""`` Usage: ``./gradlew :composite-parsers:run --args="<caruna invoice>.pdf <helen-invoice>.pdf""``
### Plugsurfing Invoice Parser
PDF parser to parse Plugsurfing electrical monthly bills
Usage: ``./gradlew :plugsurfing-invoice:run --args="/path/to/pdf"``

View File

@ -1,7 +1,6 @@
package com.devsoap.parsers.composites; package com.devsoap.parsers.composites;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet; import java.util.HashSet;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors; import java.util.stream.Collectors;

View File

@ -35,17 +35,6 @@ public class Parser {
public int nightEnergy = 0; public int nightEnergy = 0;
public double dayEnergyEur = 0.0; public double dayEnergyEur = 0.0;
public double nightEnergyEur = 0.0; public double nightEnergyEur = 0.0;
@Override
public String toString() {
return "Period{" +
"basicPay=" + basicPay +
", dayEnergy=" + dayEnergy +
", nightEnergy=" + nightEnergy +
", dayEnergyEur=" + dayEnergyEur +
", nightEnergyEur=" + nightEnergyEur +
'}';
}
} }
public static void main(String[] args) { public static void main(String[] args) {

View File

@ -0,0 +1 @@
application.mainClass='com.devsoap.parsers.plugsurfing.Parser'

View File

@ -0,0 +1,101 @@
package com.devsoap.parsers.plugsurfing;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.NumberFormat;
import java.text.ParseException;
import java.time.Duration;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.regex.Pattern;
public class Parser {
private static final Pattern DATE_TIME_KWH_DURATION_PATTERN = Pattern.compile("(\\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}) \\((.*), (.*)kWh\\)");
private static final Pattern QUANTITY_PATTERN = Pattern.compile("(\\d+) (\\d+,\\d+) (\\d+)% (\\d+,\\d+) €");
private static final Locale FI_LOCALE = new Locale("FI", "fi");
private static final DateTimeFormatter FI_DATE = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm").withLocale(FI_LOCALE);
private static final DateTimeFormatter FI_TIME = DateTimeFormatter.ofPattern("HH:mm:ss").withLocale(FI_LOCALE);
private static final DateTimeFormatter KEY_FORMATTER = DateTimeFormatter.ofPattern("yyyy-M");
private static final NumberFormat GERMANY_AMOUNT = NumberFormat.getNumberInstance(Locale.FRANCE);
public static void main(String[] args) {
var filename = args[0];
var file = Paths.get(filename);
System.out.println("Vuosi,Kuukausi,Latauksia,Perusmaksu(€),Total(€),Energia(kwh)");
parse(file).forEach((month, sessions) -> System.out.println(sessions.stream()
.reduce(Session::add)
.map(session -> String.format("%s,%s,%d,%.02f,%.02f,%.02f",
month.split("-")[0], month.split("-")[1],
session.quantity, session.unitPrice, session.amountEur, session.kwh))
.orElseThrow()));
}
public static class Session {
public LocalDateTime timestamp;
public Duration duration;
public int quantity = 0;
public double unitPrice = 0;
public double taxRate = 0;
public double amountEur = 0;
public double kwh;
public Session add(Session session) {
this.duration = this.duration.plus(session.duration);
this.quantity += session.quantity;
this.unitPrice += this.unitPrice;
this.taxRate = Math.max(this.taxRate, session.taxRate);
this.amountEur += session.amountEur;
this.kwh += session.kwh;
return this;
}
}
public static Map<String, List<Session>> parse(Path file) {
var sessions = new HashMap<String, List<Session>>();
try (var reader = new PdfReader(file.toFile())) {
var document = new PdfDocument(reader);
Session session = null;
for (var pageIndex=1; pageIndex <= document.getNumberOfPages(); pageIndex++) {
var page = document.getPage(pageIndex);
var text = PdfTextExtractor.getTextFromPage(page);
var scanner = new Scanner(text);
while(scanner.hasNextLine()) {
var line = scanner.nextLine();
if (DATE_TIME_KWH_DURATION_PATTERN.asPredicate().test(line)) {
var matcher = DATE_TIME_KWH_DURATION_PATTERN.matcher(line);
session = new Session();
while(matcher.find()) {
session.timestamp = LocalDateTime.parse(matcher.group(1), FI_DATE).withSecond(0);
session.duration = Duration.between(LocalTime.of(0,0,0), LocalTime.from(FI_TIME.parse(matcher.group(2))));
session.kwh = Double.parseDouble(matcher.group(3));
}
} else if (session != null && QUANTITY_PATTERN.asPredicate().test(line)) {
var matcher = QUANTITY_PATTERN.matcher(line);
while(matcher.find()) {
session.quantity = Integer.parseInt(matcher.group(1));
session.unitPrice = GERMANY_AMOUNT.parse(matcher.group(2)).doubleValue();
session.taxRate = Double.parseDouble(matcher.group(3)) / 100.0;
session.amountEur = GERMANY_AMOUNT.parse(matcher.group(4)).doubleValue();
sessions.computeIfAbsent(
KEY_FORMATTER.format(session.timestamp),
(k) -> new ArrayList<>())
.add(session);
}
session = null;
}
}
}
} catch (IOException | ParseException e) {
throw new RuntimeException(e);
}
return sessions;
}
}

View File

@ -2,4 +2,5 @@ rootProject.name = 'pdf-parsers'
include 'caruna-invoice' include 'caruna-invoice'
include 'helen-invoice' include 'helen-invoice'
include 'composite-parsers' include 'composite-parsers'
include 'plugsurfing-invoice'