Upload a document to DocuPanda and retrieve the parsed results
This guide demonstrates how to use DocuPanda's document parsing to extract text and structural information from documents. We'll walk through the process of uploading a document, monitoring the job, and retrieving the parsed results. This example is in Python, but the same concept applies for other programming languages.
Prerequisites
Before you begin, make sure you have:
- A DocuPanda API key
- Python 3 installed
- The
requests
library (pip install requests
)
Authentication
Every request to DocuPanda needs to include an API key. You can obtain your API key by signing up and going to your account settings page.
Step 1: Upload a Document
First, we'll upload a document to DocuPanda for parsing. You can upload a document either by providing a file or a URL.
import base64
import requests
API_KEY = "YOUR_API_KEY"
APP_URL = "https://app.docupanda.io"
DOC_PATH = "/path/to/your/doc"
DATASET_NAME = "YOUR_DATASET_NAME"
HEADERS = {"accept": "application/json", "content-type": "application/json", "X-API-Key": API_KEY}
def post_doc():
url = f"{APP_URL}/document"
payload = {
"document": {
"file": {
"contents": base64.b64encode(open(str(DOC_PATH), 'rb').read()).decode(),
"filename": "my_filename" # optional
},
# Alternatively, you can use a URL:
# "url": "INSERT_URL_HERE",
},
"dataset": DATASET_NAME
}
response = requests.post(url, json=payload, headers=HEADERS)
assert response.status_code == 200
res_json = response.json()
return {"job_id": res_json["jobId"], "doc_id": res_json["documentId"]}
response = post_doc()
print(f"Job ID: {response['job_id']}")
print(f"Document ID: {response['doc_id']}")
Replace "YOUR_API_KEY"
with your actual API key, "/path/to/your/doc.pdf"
with the path to your document, and "YOUR_DATASET_NAME"
with your desired dataset name (dataset names are optional, and only for organizational purposes to group documents together).
Step 2: Check Job Status
DocuPanda processes documents asynchronously. We can check the status of a job using its ID. The job will be marked as "completed" when the parsing is finished.
import time
def is_job_done(job_id):
url = f"{APP_URL}/job/{job_id}"
for num_attempts in range(60):
response = requests.get(url, headers=HEADERS)
assert response.status_code == 200
status = response.json()["status"]
if status == "completed":
return True
elif status == "error":
return False
time.sleep(3)
return False
success = is_job_done(response["job_id"])
print(f"Parsing completed: {success}")
Step 3: Retrieve Parsing Results
Once the job is complete, we can retrieve the parsed document results. The results include the full text of the document as well as more granular information, including text, bounding box, and type broken down by pages and sections.
def get_doc(doc_id):
url = f"{APP_URL}/document/{doc_id}"
response = requests.get(url, headers=HEADERS)
assert response.status_code == 200
return response.json()
if success:
doc = get_doc(response["doc_id"])
print(f"Full text:\n{doc['result']['text']}")
# Access individual pages and sections
for page in doc["result"]["pages"]:
print(f"Page {page['pageNum']}")
for section in page["sections"]:
print(f"Section at bounding box {section['bbox']}:\n{section['text']}")
Complete Example
Here's a complete example that puts all these steps together:
import time
import base64
import requests
API_KEY = "YOUR_API_KEY"
APP_URL = "https://app.docupanda.io"
DOC_PATH = "/path/to/your/doc"
DATASET_NAME = "YOUR_DATASET_NAME"
HEADERS = {"accept": "application/json", "content-type": "application/json", "X-API-Key": API_KEY}
def post_doc():
url = f"{APP_URL}/document"
payload = {
"document": {
"file": {
"contents": base64.b64encode(open(str(DOC_PATH), 'rb').read()).decode(),
"filename": DOC_PATH.split("/")[-1]
},
},
"dataset": DATASET_NAME
}
response = requests.post(url, json=payload, headers=HEADERS)
assert response.status_code == 200
res_json = response.json()
return {"job_id": res_json["jobId"], "doc_id": res_json["documentId"]}
def is_job_done(job_id):
url = f"{APP_URL}/job/{job_id}"
for num_attempts in range(60):
response = requests.get(url, headers=HEADERS)
assert response.status_code == 200
status = response.json()["status"]
if status == "completed":
return True
elif status == "error":
return False
time.sleep(3)
return False
def get_doc(doc_id):
url = f"{APP_URL}/document/{doc_id}"
response = requests.get(url, headers=HEADERS)
assert response.status_code == 200
return response.json()
def main():
response = post_doc()
print(f"Job ID: {response['job_id']}")
print(f"Document ID: {response['doc_id']}")
success = is_job_done(job_id=response["job_id"])
print(f"Parsing completed: {success}")
if success:
doc = get_doc(doc_id=response["doc_id"])
print("Document parsing completed successfully")
print(f"Full text:\n{doc['result']['text']}")
for page in doc["result"]["pages"]:
print(f"Page {page['pageNum']}")
for section in page["sections"]:
print(f"Section at bounding box {section['bbox']}:\n{section['text']}")
else:
print(f"Upload failed for {DOC_PATH}")
if __name__ == '__main__':
main()
Remember to replace "YOUR_API_KEY"
, "/path/to/your/doc.pdf"
, and "YOUR_DATASET_NAME"
with your actual values.
This example demonstrates how to use DocuPanda's document parsing feature to extract text and structural information from documents. The parsed results include both the full text of the document and detailed information about the location of text within pages and sections. You can use this structural information for more advanced document analysis or to maintain the original document layout in your applications.