Attribute VB_Name = "ImportPDF"

' (c) Dan Elgaard (www.EXCELGAARD.dk)
'https://pdftables.com/api
'api= bntx9fduvm8r 'older one is - 7syvkone4oal
' Public constants for accessing the PDFTables website
  Public Const URL_Purchase_Pages As String = "https://pdftables.com/pricing"
  Public Const URL_Obtain_API_Key As String = "https://pdftables.com/pdf-to-excel-api"

' Module level constants
  Private Const URL_API_Remaining  As String = "https://pdftables.com/api/remaining?key=!KEY!" '7syvkone4oal" '!KEY!"
  Private Const URL_API_PDF2Excel  As String = "https://pdftables.com/api?key=!KEY!&format=xlsx-single" '7syvkone4oal&format=xlsx-single" '!KEY!&format=xlsx-single"

' Private Const HTTPRequest_Client As String = "Microsoft.XMLHTTP"            ' Faster, but gets corrupted more often, and will cache requests, thus cannot be used for fetching remaining pages.
  Private Const HTTPRequest_Client As String = "WinHttp.WinHttpRequest.5.1"   ' Slower, but more robust, and will not cache requests

' Public variable to test, if we were successful in converting/importing from PDF document to Excel worksheet
  Public PDFTables2ActiveSheet_Success As Boolean

' API Function
  #If VBA7 Then
      Private Declare PtrSafe Function DownloadURLToFile Lib "URLMon.DLL" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
  #Else
      Private Declare Function DownloadURLToFile Lib "URLMon.DLL" Alias "URLDownloadToFileA" (ByVal pCaller As Long, ByVal szURL As String, ByVal szFileName As String, ByVal dwReserved As Long, ByVal lpfnCB As Long) As Long
  #End If

Option Explicit
Sub PDF2Workbook()

      Application.Run "PDFTables2Workbook", , True

End Sub

Sub ShowPagesLeft()

      MsgBox PDFTablesPages, vbOKOnly + vbInformation, " PDFTables"

End Sub
Sub PDFTables2Workbook(Optional ByVal InitialFolderFile As String = vbNullString, Optional ByVal AllowMultiSelect As Boolean = False)

' This macro will allow you to select PDF documents to be converted into Excel, using online API from pdftables.com.
'
' For this to work, these things MUST be in order:
'
' - The workbook must have VB Reference to 'Microsoft Office Object Library' (MSOL)
' - There must an active working Internet connection available, when using this macro.
' - You must be a registered user at pdftables.com and have a key for their API (you can try it out for free, for the first 50 pages)
'
' The argument, 'InitialFolderFile', can be either a file to open or a folder to open from.
' The argument, 'AllowMultiSelect', can be used to allow the user to convert more than one PDF at the same time.
'
' The macro will create (and open) a new workbook for each converted PDF document.


' * ' Initialize
      On Error Resume Next

      If Len(PDFTablesKey) <> 12 Then Exit Sub        ' No key found/given
      If PDFTablesPages(5) < 1 Then Exit Sub          ' No pages on the account


' * ' Define variables
      Dim SelectedItem As Variant

      Dim FDO As Office.FileDialog                    ' File Dialog Object
      Set FDO = Application.FileDialog(msoFileDialogFilePicker)


' * ' Request PDF file(s) to convert into Excel
      With FDO
            .InitialFileName = InitialFolderFile
            .AllowMultiSelect = AllowMultiSelect
            .InitialView = msoFileDialogViewList

            .Filters.Clear
            .Filters.Add "PDF Documents", "*.PDF"     ' Make sure you can only pick PDF documents

            If .Show = -1 Then
                  For Each SelectedItem In .SelectedItems
                        Application.Run "PDFTables2Excel_CreateExcelFile", SelectedItem, True
                  Next
            End If
      End With


ES: ' End of Sub
      Set FDO = Nothing
      If IsArray(SelectedItem) Then Erase SelectedItem

End Sub
Sub PDFTables2ActiveSheet(Optional ByVal PDFFile As String = vbNullString)

' This macro will convert/import a PDF document into the active worksheet, using the pdftables.com API.
'
' If you don't provide a PDF document, as argument, or, if the given PDF document can not be found, the user is asked for at PDF document, using a standard file dialog.
'
' The macro accepts PDF documents located on-line, on a 'http' URL, like "http://www.EXCELGAARD.dk/Files/PDFs/Extern%20Data.PDF"


' * ' Initialize
      On Error Resume Next

      PDFTables2ActiveSheet_Success = False

      If TypeName(ActiveSheet) <> "Worksheet" Then GoTo ES:                         ' Active sheet is not a worksheet (probably a chart :-)
      If Len(PDFTablesKey) <> 12 Then GoTo ES:                                      ' No key found/given
      If PDFTablesPages(5) < 1 Then GoTo ES:                                        ' No pages on the account


' * ' Define variables
      Dim TempFile As String
      If LCase$(Left$(PDFFile, 4)) = "http" Then                                    ' An online PDF document is given - try to download it
            TempFile = Environ("TMP")
            If TempFile = vbNullString Then TempFile = Environ("TMP")
            If TempFile = vbNullString Then TempFile = ThisWorkbook.Path
            If Right$(TempFile, 1) <> Application.PathSeparator Then TempFile = TempFile & Application.PathSeparator
            TempFile = TempFile & "TempFile.PDF"

            SetAttr TempFile, vbNormal
            Kill TempFile

            If DownloadURLToFile(0, PDFFile, TempFile, 16, 0) <> 0 Then GoTo ES:    ' Download of online PDF document failed
            PDFFile = TempFile
      End If

      If Len(PDFFile) < 2 Or Len(Dir(PDFFile, vbHidden + vbSystem)) < 2 Then        ' If no PDF document is given, then ask for one
            Application.ScreenUpdating = True
            Application.Interactive = True
            Application.Cursor = xlDefault
            PDFFile = Application.GetOpenFilename("PDF (*.PDF), *.PDF")             ' Ask for PDF file to convert/import
            If UCase$(PDFFile) = "FALSE" Then GoTo ES:                              ' User clicked [Cancel]
      End If


' * ' Convert PDF document into Excel workbook
      PDFFile = Application.Run("PDFTables2Excel_CreateExcelFile", PDFFile, False)  ' Create temporary Excel file
      If Dir(PDFFile) = vbNullString Then GoTo ES:                                  ' Something went wrong...


' * ' Transfer converted PDF to active worksheet
      Call PDFTablesTransfer2Sheet(PDFFile)                                         ' Transfer to active worksheet


ES: ' End of Sub
      Kill PDFFile                                                                  ' Delete temporary file

End Sub
Private Sub PDFTablesTransfer2Sheet(ByVal ExcelFile As String)

' This macro will transfer the contents of the first worksheet in the given Excel workbook to the active worksheet.


' * ' Initialize
      On Error Resume Next


' * ' Define variables
      Dim HXL As Excel.Application                                                  ' HXL = Hidden Excel
      Set HXL = New Excel.Application

      With HXL                                                                      ' \
            .Visible = False                                                        '  \
            .EnableEvents = False                                                   '   > Set properties of hidden instance of Excel,
            .DisplayAlerts = False                                                  '   > so that it doesn't 'disturb' us while opened.
            .ScreenUpdating = False                                                 '  /
      End With                                                                      ' /

      Dim HWB As Excel.Workbook                                                     ' HWB = Hidden Workbook
      Set HWB = HXL.Workbooks.Open(ExcelFile)

      Dim HWS As Excel.Worksheet                                                    ' HWS = Hidden Worksheet
      Set HWS = HWB.Sheets(1)

      Dim DWS As Excel.Worksheet                                                    ' DWS = Destination Worksheet
      Set DWS = ActiveSheet


' * ' Prepare active worksheet
      DWS.Unprotect
      If DWS.ProtectContents = True Then GoTo ES:                                   ' Can not copy to a protected worksheet

      DWS.Select
      DWS.DisplayPageBreaks = False
      DWS.Cells.Delete

      Range("A1").Select


' * ' Copy contents of PDF to active worksheet
      Err.Clear
      HWS.Cells.Copy
      DWS.Paste
      If Err.Number = 0 Then
            PDFTables2ActiveSheet_Success = True
      Else
            PDFTables2ActiveSheet_Success = False
            Debug.Print "PDFTablesTransfer2Sheet : " & Err.Number & ", " & Err.Description
      End If


ES: ' End of Sub
      HWB.Saved = True                                                              ' In case of volatile functions in the hidden workbook
      HWB.Close SaveChanges:=False

      HXL.Quit                                                                      ' We can just quit the hidden instance of Excel, since we don't have any more workbooks opened in it

      Set HWS = Nothing                                                             ' \
      Set HWB = Nothing                                                             '  > Clean up object variables
      Set HXL = Nothing                                                             ' /
      Set DWS = Nothing                                                             '/

End Sub
Function PDFTablesKey(Optional ByVal DisplayWarning As VbTriState = vbUseDefault) As String

' This little User-Defined Function (UDF) allows you to set the key for your pdftables.com subscription.
'
' You can either hardcode the key into your project (not recommended), or
' you can set the key into an environment variable, or
' your can save the key to the Registry (recommended).
'
' If you don't have a key for using the PDFTables API, you can obtain one from https://pdftables.com/
'
' DisplayWarning  =  Default  =  Ask for key, if missing, using InputBox
'                 =  False    =  Don't display anything
'                 =  True     =  Display warning, but don't request a key


' * ' Initialize
      On Error Resume Next


' * ' Define variables
      Dim MsgBoxPrompt As String
      MsgBoxPrompt = "You must have an API key!" & vbNewLine & vbNewLine & "Please, get an API key at " & URL_Obtain_API_Key & vbNewLine & vbNewLine & "Visit website now?"

      Dim InputBoxPrompt As String
      InputBoxPrompt = "You must have an API key! " & vbNewLine & vbNewLine & "Enter key:"


' * ' Set the key to the function
      PDFTablesKey = ""                                                                   ' Insert your own key here, if you want to hardcode the key into the project (not recommended)
      If PDFTablesKey = "" Then PDFTablesKey = Environ("PDFTables Key")                   ' Key stored in an environment variable
      PDFTablesKey = GetSetting("Credentials", "PDFTables", "Token, Key", PDFTablesKey)   ' Key saved to Registry (recommended)


' * ' Request key
      If PDFTablesKey = "" And DisplayWarning = vbUseDefault Then
            PDFTablesKey = Application.InputBox(prompt:=InputBoxPrompt, Title:=" PDFTables", Type:=2)
            If Len(PDFTablesKey) <> 12 Then GoTo EF:
            SaveSetting "Credentials", "PDFTables", "Token, Key", PDFTablesKey
      End If


' * ' Display warning
      If Len(PDFTablesKey) <> 12 And DisplayWarning <> False Then
            If MsgBox(MsgBoxPrompt, vbYesNo + vbCritical, " PDFTables") = vbYes Then     ' Visit website to obtain a key
                  ActiveWorkbook.FollowHyperlink Address:=URL_Obtain_API_Key, NewWindow:=True, AddHistory:=True
            End If
      End If


EF: ' End of Function
      If Len(PDFTablesKey) <> 12 Then PDFTablesKey = vbNullString

End Function
Function PDFTablesPages(Optional ByVal DisplayWarningBelow As Long = -1, Optional ByVal AccountKey As String = vbNullString) As Long

' This little function will return the number of pages left on the subscribtion at pdftables.com


' * ' Initialize
      On Error Resume Next


' * ' Define variables
      Dim BodyText As String

      Dim MsgBoxPrompt As String
      MsgBoxPrompt = "There are !PAGES! pages left." & vbNewLine & vbNewLine & "You can purchase more pages at " & URL_Purchase_Pages & vbNewLine & vbNewLine & "Visit website now?"

      If Len(AccountKey) <> 12 Then AccountKey = PDFTablesKey
      If Len(AccountKey) <> 12 Then GoTo EF:

' * ' Read number of pages left
      With CreateObject(HTTPRequest_Client)
            .Open "GET", Replace(URL_API_Remaining, "!KEY!", AccountKey), False
            .send
            If .Status <> 200 Then
                If MsgBox(.responseText, vbOKOnly + vbCritical, "Error") = vbOK Then
                    Exit Function
                End If
            Else
                BodyText = .responseText
            End If
      End With


' * ' Display warning
      If Len(BodyText) > 0 And Val(BodyText) <= DisplayWarningBelow Then
            If MsgBox(Replace(MsgBoxPrompt, "!PAGES!", Val(BodyText)), vbYesNo + vbExclamation, " PDFTables") = vbYes Then
                  ActiveWorkbook.FollowHyperlink Address:=URL_Purchase_Pages, NewWindow:=True, AddHistory:=True
            End If
      End If


EF: ' End of Function
      PDFTablesPages = Val(BodyText)

End Function
Private Function PDFTables2Excel_CreateExcelFile(ByVal PDFFile As String, Optional ByVal OpenAfterCreation As Boolean = False) As String

' This function will convert a given PDF file into an Excel workbook, using the pdftables.com API.
'
' For this to work, these things MUST be in order:
'
' - The workbook must have VB Reference to 'Microsoft Office Object Library' (MSOL)
' - There must an active working Internet connection available, when using this macro.
' - You must be a registered user at pdftables.com and have a key for their API (you can try it out for free, for the first 50 pages)


' * ' Initialize
      On Error Resume Next


' * ' Define variables
      Dim ExcelFileData As Variant
      ExcelFileData = PDFTables2Excel_ConvertFile(Replace(URL_API_PDF2Excel, "!KEY!", PDFTablesKey), PDFFile)

      Dim DataArray() As Byte
      DataArray = ExcelFileData

      Dim FileNumber As Long
      FileNumber = InStrRev(PDFFile, ".")
      PDFFile = Left$(PDFFile, FileNumber) & "XLSx"
      FileNumber = -1
      FileNumber = InStrRev(PDFFile, Application.PathSeparator)
      If FileNumber > 0 Then PDFFile = Mid$(PDFFile, FileNumber + 1)
      PDFFile = UCase$(Left$(PDFFile, 1)) & Mid$(PDFFile, 2)

      Dim ExcelFile As String
      ExcelFile = CreateObject("Scripting.FileSystemObject").GetSpecialFolder(2)
      If Right$(ExcelFile, 1) <> Application.PathSeparator Then ExcelFile = ExcelFile & Application.PathSeparator
      ExcelFile = ExcelFile & PDFFile


' * ' Create temporary temporary Excel file
      FileNumber = FreeFile
      Open ExcelFile For Binary Lock Read Write As #FileNumber
            Put #FileNumber, , DataArray
      Close #FileNumber


EF: ' End of Function
      PDFTables2Excel_CreateExcelFile = ExcelFile                             ' Return the path of the created Excel file as result of the function

      If OpenAfterCreation = True Then Workbooks.Open (ExcelFile)

      Erase DataArray

End Function
Private Function PDFTables2Excel_ConvertFile(ByVal APIURL As String, ByVal PDFFile As String) As Variant

' This function will upload (post) the given PDF document to the given online API at pdftables.com
' and retrieve the converted Excel file back, and place the Excel file in the Windows temporary directory.


' * ' Initialize
      Const Boundary As String = "3fbd04f5Rb1edX4060q99b9Nfca7ff59c113"

      On Error Resume Next


' * ' Define variables
      Dim BufferArray() As Byte
      Dim BodyData As String

      Dim FileNumber As Long
      FileNumber = FreeFile


' * ' Read file
      Open PDFFile For Binary Access Read As #FileNumber
            If LOF(FileNumber) > 0 Then
                  ReDim BufferArray(0 To LOF(FileNumber) - 1) As Byte
                  Get FileNumber, , BufferArray
                  BodyData = StrConv(BufferArray, vbUnicode)
            End If
      Close #FileNumber


' * ' Prepare body
      BodyData = "--" & Boundary & vbCrLf & _
                 "Content-Disposition: form-data; name=""uploadfile""; filename=""" & Mid$(PDFFile, InStrRev(PDFFile, Application.PathSeparator) + 1) & """" & vbCrLf & _
                 "Content-Type: application/octet-stream" & vbCrLf & vbCrLf & BodyData & vbCrLf & _
                 "--" & Boundary & "--"


' * ' Post (upload) the PDF document, and retrieve the converted Excel file, and the .ResponseText of the URL
      With CreateObject(HTTPRequest_Client)
            .Open "POST", APIURL, False
            .setRequestHeader "Content-Type", "multipart/form-data; boundary=" & Boundary
            .send PDFTables2Excel_BodyElements2Bytes(BodyData)
            If .Status <> 200 Then
                If MsgBox(.responseText, vbOKOnly + vbCritical, "Error") = vbOK Then
                    Exit Function
                End If
            Else
                PDFTables2Excel_ConvertFile = .responseBody
            End If
      End With


EF: ' End of Function
      Erase BufferArray

End Function
Private Function PDFTables2Excel_BodyElements2Bytes(ByVal BodyData As String) As Byte()

' This sub-function converts the body string into an array (byte array)

      On Error Resume Next
      PDFTables2Excel_BodyElements2Bytes = StrConv(BodyData, vbFromUnicode)

End Function