From 9e883631ecddfe1e116ea8afb97c95031c4c3714 Mon Sep 17 00:00:00 2001 From: John Ahlroos Date: Tue, 20 Apr 2021 13:00:12 +0300 Subject: [PATCH] Add Helen parser --- README.md | 11 +- .../com/devsoap/parsers/caruna/Parser.class | Bin 6652 -> 6640 bytes .../parsers/{caruna => helen}/Parser.java | 8 +- helen-invoice/build.gradle | 1 + .../devsoap/parsers/helen/Parser$Period.class | Bin 0 -> 1224 bytes .../com/devsoap/parsers/helen/Parser.class | Bin 0 -> 8474 bytes .../compileJava/source-classes-mapping.txt | 3 + .../com/devsoap/parsers/helen/Parser.java | 126 ++++++++++++++++++ settings.gradle | 1 + 9 files changed, 142 insertions(+), 8 deletions(-) rename caruna-invoice/src/main/java/com/devsoap/parsers/{caruna => helen}/Parser.java (96%) create mode 100644 helen-invoice/build.gradle create mode 100644 helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class create mode 100644 helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser.class create mode 100644 helen-invoice/build/tmp/compileJava/source-classes-mapping.txt create mode 100644 helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java diff --git a/README.md b/README.md index 7e6b0b4..dd60e8f 100644 --- a/README.md +++ b/README.md @@ -5,4 +5,13 @@ This project contains different PDF parsers for my personal use. Feel free to co ### Caruna Invoice Parser PDF parser to parse Caruna invoices. -Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"`` \ No newline at end of file +Usage: ``./gradlew :caruna-invoice:run --args="/path/to/pdf"`` + + +### Helen Invoice Parser +PDF parser to parse Helen invoices. + +Usage: ``./gradlew :helen-invoice:run --args="/path/to/pdf"`` + + + diff --git a/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class b/caruna-invoice/build/classes/java/main/com/devsoap/parsers/caruna/Parser.class index ab92d8dee796a80492e1d082901c77c97a60026f..3d50a8a273151a0899f78289d7b779367d920db7 100644 GIT binary patch delta 557 zcmXw$OGp%86otQYXB&K=AaWI05Y@6awJGRkwz?npaR2|Ddw(^4Yn-|kEqXzSB-oq$&FDz@ zd%W;Dsiye!kLYJ9k}oY!L=|9%l6c!Y?@^vHe<8AU#;_eB%lINh*bV|EH~cF z9S(4p6puK_E3Zqm_*5$&IK)@d{G?4`4l6=NyEv*8S!KwngJT+^T{q~^1f9A^mnP}f zGkP>juinw875epsllskom2k?c8MNJ;w!NIO6lbk_ob%Sp1si6_?t6X6uuU@J0UB(E zD{g$m=D27to#(k^3tYC>jJkbP>Z@2HtYWPr;dZlJb(0A|djm|KwkZXHmazrA)4)+%2h!jEjFl__rkQ=fGSl*5pC9|xKlAl ztVao_2s!IiQq`2uZpT&Qy)wEit6HZhCoH3lu2#^WwzM{Rv(l6QOA;q(kF{A_2zc_K mwrZQx!Yb7f_EOk}J8 delta 569 zcmXw%O-Phc7=@qnj-z8Xsf~_){`9Mviy$y6j3~8)DN|E4P1CH5GA*@5A(4xqMdWH~ z+opE52n`9swV;biV`@=Bn`{x~HVEp?Y;_*a%f0uUd+$osuc~L)W0|i>rx@&g@>Xmh z=Vy>HLrrxs#qJ=OVgK~^*cU0BjO5iM-aTj`GGeMLnNL@Reh%aGc*XD2Efur%`)pR)UjirA1w|YM3_N zpef4Zb*_-0mvdH%w!f-BzwP5@wIxP|T<49I`>>dH)RR?2_W{MX0xa z6{wI*5_V33Qx*-@t0Jc;P1dbqm5@z~b*j|8T-vN%o19{FSgSUxq=9_8tXW&!Eb!+4 yngkTvQ*G5YLf$-0+g0YYv`XcK{(c1QPz8`?Z?)6I$e(v1dB3n)W7BhmmHGplQE6=e diff --git a/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java b/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java similarity index 96% rename from caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java rename to caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java index 285b003..1ad5363 100644 --- a/caruna-invoice/src/main/java/com/devsoap/parsers/caruna/Parser.java +++ b/caruna-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -1,25 +1,19 @@ -package com.devsoap.parsers.caruna; +package com.devsoap.parsers.helen; import com.itextpdf.kernel.pdf.PdfDocument; -import com.itextpdf.kernel.pdf.PdfPage; import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; import java.io.FileNotFoundException; import java.io.IOException; -import java.nio.file.Path; import java.nio.file.Paths; -import java.text.DateFormat; -import java.text.ParseException; import java.time.LocalDate; import java.time.format.DateTimeFormatter; import java.time.format.FormatStyle; import java.time.format.TextStyle; -import java.util.Date; import java.util.Locale; import java.util.Objects; import java.util.Scanner; -import java.util.regex.Matcher; import java.util.regex.Pattern; public class Parser { diff --git a/helen-invoice/build.gradle b/helen-invoice/build.gradle new file mode 100644 index 0000000..f1d9f1e --- /dev/null +++ b/helen-invoice/build.gradle @@ -0,0 +1 @@ +application.mainClass='com.devsoap.parsers.helen.Parser' diff --git a/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class b/helen-invoice/build/classes/java/main/com/devsoap/parsers/helen/Parser$Period.class new file mode 100644 index 0000000000000000000000000000000000000000..7a6c516e75ee55e24b90d972ab1105334e7cc8db GIT binary patch literal 1224 zcma)5TTc^F5dID=y|EUoC>Is07HBK7-fu-8XdyJI*aU(P`f|EGX&06~&F(1)G5!yK zh9)H@Jop3rQN}sD6&5Ax!_Mr?H)p=t+3$S+ar_Oy3SML|f>;u91EWYVOziRlZaUoU zm~UFU!cq*072CDdD~4FHw39{(=_E1+#$YfkT5{iPivwTsp4sD`FFfDu3P-qRGl&XJ z;n}jykZN(?wwnBqfz@=EtbuV%Fr?f3aLpB7MZz9*{EwYYj~t1i9yio42Zm?_rk^;)geIQ{Z1 z<=Jkh!Vs^@wqO`<*sj>@@3(}v&07v-atXNP;Xj)%#(>e|$>)cDV(5J5`Gkc?rn zc{RDSWjdeU(IsA^$H{`nju8DqnfL_gbi~Z2RL0EM;&P z%So7eQTG@Mr)#j?1Gy*6aKLbP>)cY(qd)bj7}?jbFl@TP-L@nALPN^Eey!hWe7ZJpL|%I0FhAi9x-|Y=E`5e%Go0%%dsO|(B`5JC$*XS5}dn1b`}Y;1;jCr zG!~G>B3;5Frct61U9G7`c9Z2Xvck_L_D$+L*X zOX9={ZsN2~(j<1;bct!pEm~*K^K2%YV4Kq!w^ zPbL*l4u)W&%s{z`3RDW_YF^2>6B&pn>`1qj8A_AOpiQPtwVhLvxVxJUOdqOqhw^AN z8K|O+QD-C)&)CN@6elulr;>IeqMF^Yfjzbrvs0y*hxrDsF>x&x2&$$_!?S0ccFM-H z)OxHatr}?xVGb55rCleO;|ZIatLrncSaAI;BD6Zu?1-Jrgs=!p6`5+m4Yi%K>~Mu{ zs_Ro*mYcX9D+C6W)ol$@z!kL}b+c_(t0?phxY0nZi8`zltiKYwsFfVE(veXsm9|q# zW4+X4+p$c_ie{XYpn`1GP5 z1`bX@qGGr(0B*R`g8RM_JZO`7GUCldt_8#hP z?(J>c(mF`a2v3S(V@Ep}S zVKvEtY%-dOI}{?fqNx;n!SLN@;tuo+7ERHezF1I{vD3=U{7&lPW|Pv_ZX7Ui5O-4h zBUUClq|SM>e{IWw|b z;5Tpx35*y>nsC5H%k_d;iIbuR6v5VaQh}z!FdlVMRwB~tHJYPQJDqlvlqsYQWK3i+ zmh*SLr_+gA33V)`3WAehlgH~FNO~vCC!!f0GjSXz=<&q7T~0DHM20yh(#!Q=Qwi?F z{RSQ|@ov0FuyC3_6khgZj;qV0sC{o|Cw*(WMJ5;V5_@kB*R(_Eli5K$Bq(U_2;u|$ z4#Y!v5DzP){3*eWeyw@{JKb*T{`e3cG4Np%KaHOumQP9Q9j}}xZH=c#6V~xAYlQM{ z^Xt&_oheCO9zG#+j-2lMp%A(Y@T6OZFlyqk$0gUTGlrj9CRGe2+Q)A$932eI64agvPqeeui?Tj`9I%rLju zaCIu0uC*Y3Ng(w>{0ct}N{S~;{3?Ep0hY3L;ET*gkrB<1>cUllZKGUpMg^ z_|2RHXhpU<*~7|tN_7zBdU?p>8NIHx3WCp>_-#DJptD9t6UPNhb55I&eC-|t($_cR zjES>&nm$3e9?fR#j)CUGX=W*c7XRTqg!37G5*sJU`OoI9uVSh>uB19gyr^yUR^*Gb zEj~#>3G}Rq^SD4gObSG#b|q@NN{M_FXOKx@<#Q&!pf0J3wyw54w{6$5rzZXkU#0h3M8i?CS~jy?x5bs;GsjeLxfx$G@fY|@x+}xC z&5AN=mV5fUoZI9!g%oq}*Czf(6$Zu8gp;-n{A~_z_1SxeQqEC}ei_2c_f%aSByJ6Q}D2rjz_yK29~sj@Ff1-#CPyrDtOFFWbNGp$_H{T?u%dE z6BqFxCjJxOClJQ0<83P8R8%^YhrOk!{6NE^-&s8u)0=-};=h%w=H%wKY)VnRWYSGZ zwKN$Y9Ll(KIW7gJ6sis7zHx3Lh)J<20Ws)IE~pDO`c38et-tKb z_U$BAPv+AHV+Uhf4>tHds#&+X);HqK>xqVy)tp*cuRGOrtKDLnQYPiptik|YjIEwQ zN~==(n_5^c=u_$&cVC{p}1%J?mfMNB`%>WtYcs& zWRWS0WeJ1B8PKfaC+wJ;0OjtZEA93)=AWJ^AxucMDa&NJAascgU-v z$n>YhiSZ(~>QxY;^rq6cJ}VVheQ&TRGZg35y<8W!vy~7{q&#B9lPY@r zKAVHzK7!bYo4|Pn(sAic&s1yV(r=xEWUNFkR!i(`ia@aQC}FfHq3fey%q!@L<+GLH zxw@aiG8-PW*Al>ITdyl;a>+1zWC*0)0-3Pqb1|=KCP{;#M|h&T{PSLg$N2!uU%q_s zX<{~>-b({9i8fj4GP)8zlju~9?So*B7g?!6CCLo7Abt)ol9M{SR6y3FD{QS$7p8mz ztZ9^U%yR|Ff9j#F^L^&{XwGlCeH!vKu*n_roBbWV%0v97Ej&?~zI#|KYu7Ynt06n6 zA$4h7;ibua94Sj!BZp(w3gw+E)(N)EfL~WFNFDt+;M!T>KKaZR6*>8tCP&jOmhxn| zS#U&W9{&Ty{3Id>{XtkQ<_Wq~mxg+r zY$|HEYws;}pE(*-vO_rO;GHE`pjqb8DLk!=FfE)_R-C`MXm^F<= zS-tBlNqcTQIpz%85f``IGN)a?%Q1_1%&$mqp-XhTm5e3q^omZ$8P1M2&7huX9Ndk) z45=BG`=fe>tia>lVkHtij16AG4B-M19&IorLlQUSh$+L8$UPnSkJhQfh3u_?+`&pm z{JpRI-ybtid6(-TZ_?GQUqyM-dapm3yIeJu!Yhk!zf=03L7(0b)yJ2*4!OsaJ7jOA z`p;`2udMY}UM;yXMee96eX_4o=s^R1LA<;1>appnSMnhDD$e(r(kpw}dqDNxZA!P? zUMUY=_L_0!Gxc%n`_$HlOxY=&mBM!wvqL>1wxL4Rv4kvOQCEb8>amlpg{q!ZUzHl_KHuW_4YDlfuPhL`p1lZr%-B)9 z@*Lze8wIk0e>u&+^QDG=O?QFZAUCpCBDGTIZZ6sngT)!-yH!T#oWjp;o>TbPmQ{t% zp=1K3jYStQr$1cuEanQ%A>3Ge0gL*>#pkf3F%S+Ek7HS*5jG}J(-^#fRsG@MIjn9h zIfFG->&LO-0yg)b!`8-7xMUo+J}!Og+uB%q2HVHc6)t@iy#;)Md4&CN={WW`nqhMs zcRh}yXAljWMdKJ8$M6L18CN6sUciI>Rqv~M|2do-#|InB!exbA^5{5zHe6N|E*r-u zi(~Ac$Ik(rH3QsV^@~-%JdV#y;MB(Qa41}M9=`>Q`g;S%or6u}D?yD5s3~QG$AynbSYC){;>+U}4Dsgz=kJSuDCCWhPwd05uU&D7FXg3mh62C+lNby|CEw_>iQ*!yHG*gxW zl*${jjjcj$r50(WmD;=@cR954SJT@#azDy1(HNzMY&RJ7VqhMl-M}J4Iyhipl|f6= z1hu+%=m)sB;714>IP?}iUT}%P5Xj#Ol6d(NPh#Ycb5u_S-$KDQ179cg*8`Oyg`^%x zeUi#4q(~iAZ&U`JLqUJx%AWqBRZrs^6Zm#de=%F%o4|kd^at4bp&m2XdRvbLS3NDF z#wMg_7vZIz=rRR#*C>r4gX{Q;znR}=jA1*DYHL@!Otn3^i1@xsx~R=&{`|a~dM%)k zyeT{(KSfFgS2ctK3$KekgYPkb0u=0I?eH`BAqVFdarA9>^lddN5)M4Y9WLa$lEi8A zBlNOiEa7h(t1zM^*+K=7vi2u7*itrB5Z$0XOCf)Qws4Vd?V)t)Tn*AgiIp;Wzj<=5 zr5C83MBR)1=Oj2KfeZSBnxhN)K~a>z{k)mI{THP_ z2mWVe&Ka3MA=jOeIpebQETYO4L<7|lZN6*ipkcm^E=Dy?a6OjtSL$kBlS>#V`?Snlawl7jd=|WmG-5KqkQ)(4Qo5CvtQ_;kPE3y7?~T1j-Yf5y4{*PFk-kXh OI?3}qEFY9d5dJX)>Ql!6 literal 0 HcmV?d00001 diff --git a/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt b/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt new file mode 100644 index 0000000..7b6ed76 --- /dev/null +++ b/helen-invoice/build/tmp/compileJava/source-classes-mapping.txt @@ -0,0 +1,3 @@ +com/devsoap/parsers/helen/Parser.java + com.devsoap.parsers.helen.Parser + com.devsoap.parsers.helen.Parser$Period diff --git a/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java new file mode 100644 index 0000000..d7726d9 --- /dev/null +++ b/helen-invoice/src/main/java/com/devsoap/parsers/helen/Parser.java @@ -0,0 +1,126 @@ +package com.devsoap.parsers.helen; + +import com.itextpdf.kernel.pdf.PdfDocument; +import com.itextpdf.kernel.pdf.PdfReader; +import com.itextpdf.kernel.pdf.canvas.parser.PdfTextExtractor; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.FormatStyle; +import java.time.format.TextStyle; +import java.util.HashMap; +import java.util.Locale; +import java.util.Objects; +import java.util.Scanner; +import java.util.function.Function; +import java.util.regex.Pattern; + +public class Parser { + + private static final Pattern PERUSMAKSU_PATTERN = Pattern.compile( + "perusmaksu (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d).* (\\d*,\\d\\d) e"); + private static final Pattern ENERGIA_PATTERN = Pattern.compile( + "energia (\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d)-(\\d\\d?\\.\\d\\d?\\.\\d\\d\\d\\d) ([0-9 ]*) kWh (\\d+,\\d\\d) c"); + + + private static final Locale FI_LOCALE = new Locale("FI", "fi"); + private static final DateTimeFormatter FI_DATE = DateTimeFormatter + .ofLocalizedDate(FormatStyle.SHORT) + .withLocale(FI_LOCALE); + + + + static class Period { + double basicPay = 0.0; + int dayEnergy = 0; + int nightEnergy = 0; + double dayEnergyEur = 0.0; + double nightEnergyEur = 0.0; + + @Override + public String toString() { + return "Period{" + + "basicPay=" + basicPay + + ", dayEnergy=" + dayEnergy + + ", nightEnergy=" + nightEnergy + + ", dayEnergyEur=" + dayEnergyEur + + ", nightEnergyEur=" + nightEnergyEur + + '}'; + } + } + + public static void main(String[] args) { + var filename = args[0]; + var daySiirtoKwh = 937; + var nightSiirtoKwh = 920; + + var file = Paths.get(filename); + try(var reader = new PdfReader(file.toFile())) { + var document = new PdfDocument(reader); + var page2 = document.getPage(2); + var text = PdfTextExtractor.getTextFromPage(page2); + + var scanner = new Scanner(text); + + System.out.println("Kuukausi,Perusmaksu (energia),Perusmaksu (siirto),Päiväenergia (kWh),Päiväenergia " + + "(EUR),Yöenergia (kWh),Yöenergia (EUR),Päiväsiirto (kWh),Päiväsiirto (EUR),Yösiirto (kWh)" + + ",Yösiirto (EUR),Vero"); + + var periods = new HashMap(); + + while(scanner.hasNextLine()) { + var line = scanner.nextLine(); + if(PERUSMAKSU_PATTERN.asPredicate().test(line)) { + var matcher = PERUSMAKSU_PATTERN.matcher(line); + while (matcher.find()) { + var month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + .getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi")); + month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + var basicPay = Double.parseDouble(matcher.group(3).replace(",", ".")); + + periods.computeIfAbsent(month, s -> new Period()); + periods.computeIfPresent(month, (s,p) -> { + p.basicPay =basicPay; + return p; + }); + } + } else if(ENERGIA_PATTERN.asPredicate().test(line)) { + var matcher = ENERGIA_PATTERN.matcher(line); + while (matcher.find()) { + var month = LocalDate.from( FI_DATE.parse(matcher.group(1))) + .getMonth().getDisplayName(TextStyle.FULL, new Locale("FI","fi")); + month = month.substring(0,1).toUpperCase() + month.substring(1, month.length()-2); + + var totalEnergy = Integer.parseInt(matcher.group(3) + .replace(" ", "")); + var eurPerKwh = Double.parseDouble(matcher.group(4) + .replace(",", ".")) / 100.0; + + periods.computeIfAbsent(month, s -> new Period()); + periods.computeIfPresent(month, (s,p) -> { + p.dayEnergy = totalEnergy - nightSiirtoKwh; + p.dayEnergyEur = p.dayEnergy * eurPerKwh; + p.nightEnergy = totalEnergy - daySiirtoKwh; + p.nightEnergyEur = p.nightEnergy * eurPerKwh; + return p; + }); + } + } + } + + periods.forEach((month,period ) -> { + var csv = String.format("%s,%.02f,,%d,%.02f,%d,%.02f,,,,", month, period.basicPay, + period.dayEnergy, period.dayEnergyEur,period.nightEnergy, period.nightEnergyEur); + System.out.println(csv); + }); + + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/settings.gradle b/settings.gradle index b9d1d89..fba2639 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,3 +1,4 @@ rootProject.name = 'pdf-parsers' include 'caruna-invoice' +include 'helen-invoice'