1
0
Fork 0

Add Helen parser

This commit is contained in:
John Ahlroos 2021-04-20 13:00:12 +03:00
parent 5735323619
commit 9e883631ec
Signed by: john
GPG Key ID: 258D0F70DB84CD5D
9 changed files with 142 additions and 8 deletions

View File

@ -5,4 +5,13 @@ This project contains different PDF parsers for my personal use. Feel free to co
### Caruna Invoice Parser
PDF parser to parse Caruna invoices.
Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"``
Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"``
### Helen Invoice Parser
PDF parser to parse Helen invoices.
Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf"``

View File

@ -1,25 +1,19 @@
package com.devsoap.parsers.caruna;
package com.devsoap.parsers.helen;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.text.DateFormat;
import java.text.ParseException;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.FormatStyle;
import java.time.format.TextStyle;
import java.util.Date;
import java.util.Locale;
import java.util.Objects;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Parser {

View File

@ -0,0 +1 @@
application.mainClass='com.devsoap.parsers.helen.Parser'

View File

@ -0,0 +1,3 @@
com/devsoap/parsers/helen/Parser.java
com.devsoap.parsers.helen.Parser
com.devsoap.parsers.helen.Parser$Period

View File

@ -0,0 +1,126 @@
package com.devsoap.parsers.helen;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Paths;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.FormatStyle;
import java.time.format.TextStyle;
import java.util.HashMap;
import java.util.Locale;
import java.util.Objects;
import java.util.Scanner;
import java.util.function.Function;
import java.util.regex.Pattern;
public class Parser {
private static final Pattern PERUSMAKSU_PATTERN = Pattern.compile(
"perusmaksu (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d).* (\\d*,\\d\\d) e");
private static final Pattern ENERGIA_PATTERN = Pattern.compile(
"energia (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d) ([0-9 ]*) kWh (\\d+,\\d\\d) c");
private static final Locale FI_LOCALE = new Locale("FI", "fi");
private static final DateTimeFormatter FI_DATE = DateTimeFormatter
.ofLocalizedDate(FormatStyle.SHORT)
.withLocale(FI_LOCALE);
static class Period {
double basicPay = 0.0;
int dayEnergy = 0;
int nightEnergy = 0;
double dayEnergyEur = 0.0;
double nightEnergyEur = 0.0;
@Override
public String toString() {
return "Period{" +
"basicPay=" + basicPay +
", dayEnergy=" + dayEnergy +
", nightEnergy=" + nightEnergy +
", dayEnergyEur=" + dayEnergyEur +
", nightEnergyEur=" + nightEnergyEur +
'}';
}
}
public static void main(String[] args) {
var filename = args[0];
var daySiirtoKwh = 937;
var nightSiirtoKwh = 920;
var file = Paths.get(filename);
try(var reader = new PdfReader(file.toFile())) {
var document = new PdfDocument(reader);
var page2 = document.getPage(2);
var text = PdfTextExtractor.getTextFromPage(page2);
var scanner = new Scanner(text);
System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " +
"(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" +
",Yösiirto (EUR),Vero");
var periods = new HashMap<String, Period>();
while(scanner.hasNextLine()) {
var line = scanner.nextLine();
if(PERUSMAKSU_PATTERN.asPredicate().test(line)) {
var matcher = PERUSMAKSU_PATTERN.matcher(line);
while (matcher.find()) {
var month = LocalDate.from( FI_DATE.parse(matcher.group(1)))
.getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi"));
month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2);
var basicPay = Double.parseDouble(matcher.group(3).replace(",", "."));
periods.computeIfAbsent(month, s -> new Period());
periods.computeIfPresent(month, (s,p) -> {
p.basicPay =basicPay;
return p;
});
}
} else if(ENERGIA_PATTERN.asPredicate().test(line)) {
var matcher = ENERGIA_PATTERN.matcher(line);
while (matcher.find()) {
var month = LocalDate.from( FI_DATE.parse(matcher.group(1)))
.getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi"));
month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2);
var totalEnergy = Integer.parseInt(matcher.group(3)
.replace(" ", ""));
var eurPerKwh = Double.parseDouble(matcher.group(4)
.replace(",", ".")) / 100.0;
periods.computeIfAbsent(month, s -> new Period());
periods.computeIfPresent(month, (s,p) -> {
p.dayEnergy = totalEnergy - nightSiirtoKwh;
p.dayEnergyEur = p.dayEnergy * eurPerKwh;
p.nightEnergy = totalEnergy - daySiirtoKwh;
p.nightEnergyEur = p.nightEnergy * eurPerKwh;
return p;
});
}
}
}
periods.forEach((month,period ) -> {
var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, period.basicPay,
period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur);
System.out.println(csv);
});
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

View File

@ -1,3 +1,4 @@
rootProject.name = 'pdf-parsers'
include 'caruna-invoice'
include 'helen-invoice'