Lab 1
Other sample invoices.
Lab 2
ai-3002-lab2-create-analyzer.py
from dotenv import load_dotenv
import os
import sys
import time
import requests
import json
def main():
# Clear the console
os.system('cls' if os.name=='nt' else 'clear')
try:
# Get the business card schema
with open("biz-card.json", "r") as file:
schema_json = json.load(file)
card_schema = json.dumps(schema_json)
# Get config settings
load_dotenv()
ai_svc_endpoint = os.getenv('ENDPOINT')
ai_svc_key = os.getenv('KEY')
analyzer = os.getenv('ANALYZER_NAME')
# Create the analyzer
create_analyzer (card_schema, analyzer, ai_svc_endpoint, ai_svc_key)
print("\n")
except Exception as ex:
print(ex)
def create_analyzer (schema, analyzer, endpoint, key):
# Create a Content Understanding analyzer
# Create a Content Understanding analyzer
print (f"Creating {analyzer}")
# Set the API version
CU_VERSION = "2025-05-01-preview"
# initiate the analyzer creation operation
headers = {
"Ocp-Apim-Subscription-Key": key,
"Content-Type": "application/json"}
url = f"{endpoint}/contentunderstanding/analyzers/{analyzer}?api-version={CU_VERSION}"
# Delete the analyzer if it already exists
response = requests.delete(url, headers=headers)
print(response.status_code)
time.sleep(1)
# Now create it
response = requests.put(url, headers=headers, data=(schema))
print(response.status_code)
# Get the response and extract the callback URL
callback_url = response.headers["Operation-Location"]
# Check the status of the operation
time.sleep(1)
result_response = requests.get(callback_url, headers=headers)
# Keep polling until the operation is no longer running
status = result_response.json().get("status")
while status == "Running":
time.sleep(1)
result_response = requests.get(callback_url, headers=headers)
status = result_response.json().get("status")
result = result_response.json().get("status")
print(result)
if result == "Succeeded":
print(f"Analyzer '{analyzer}' created successfully.")
else:
print("Analyzer creation failed.")
print(result_response.json())
if __name__ == "__main__":
main()
ai-3002-lab2-read-card.py
from dotenv import load_dotenv
import os
import sys
import time
import requests
import json
def main():
# Clear the console
os.system('cls' if os.name=='nt' else 'clear')
try:
# Get the business card
image_file = 'biz-card-1.png'
if len(sys.argv) > 1:
image_file = sys.argv[1]
# Get config settings
load_dotenv()
ai_svc_endpoint = os.getenv('ENDPOINT')
ai_svc_key = os.getenv('KEY')
analyzer = os.getenv('ANALYZER_NAME')
# Analyze the business card
analyze_card (image_file, analyzer, ai_svc_endpoint, ai_svc_key)
print("\n")
except Exception as ex:
print(ex)
def analyze_card (image_file, analyzer, endpoint, key):
# Use Content Understanding to analyze the image
# Use Content Understanding to analyze the image
print (f"Analyzing {image_file}")
# Set the API version
CU_VERSION = "2025-05-01-preview"
# Read the image data
with open(image_file, "rb") as file:
image_data = file.read()
## Use a POST request to submit the image data to the analyzer
print("Submitting request...")
headers = {
"Ocp-Apim-Subscription-Key": key,
"Content-Type": "application/octet-stream"}
url = f'{endpoint}/contentunderstanding/analyzers/{analyzer}:analyze?api-version={CU_VERSION}'
response = requests.post(url, headers=headers, data=image_data)
# Get the response and extract the ID assigned to the analysis operation
print(response.status_code)
response_json = response.json()
id_value = response_json.get("id")
# Use a GET request to check the status of the analysis operation
print ('Getting results...')
time.sleep(1)
result_url = f'{endpoint}/contentunderstanding/analyzerResults/{id_value}?api-version={CU_VERSION}'
result_response = requests.get(result_url, headers=headers)
print(result_response.status_code)
# Keep polling until the analysis is complete
status = result_response.json().get("status")
while status == "Running":
time.sleep(1)
result_response = requests.get(result_url, headers=headers)
status = result_response.json().get("status")
# Process the analysis results
if status == "Succeeded":
print("Analysis succeeded:\n")
result_json = result_response.json()
output_file = "results.json"
with open(output_file, "w") as json_file:
json.dump(result_json, json_file, indent=4)
print(f"Response saved in {output_file}\n")
# Iterate through the fields and extract the names and type-specific values
contents = result_json["result"]["contents"]
for content in contents:
if "fields" in content:
fields = content["fields"]
for field_name, field_data in fields.items():
if field_data['type'] == "string":
print(f"{field_name}: {field_data['valueString']}")
elif field_data['type'] == "number":
print(f"{field_name}: {field_data['valueNumber']}")
elif field_data['type'] == "integer":
print(f"{field_name}: {field_data['valueInteger']}")
elif field_data['type'] == "date":
print(f"{field_name}: {field_data['valueDate']}")
elif field_data['type'] == "time":
print(f"{field_name}: {field_data['valueTime']}")
elif field_data['type'] == "array":
print(f"{field_name}: {field_data['valueArray']}")
if __name__ == "__main__":
main()
Lab 3
Other sample invoices.
from dotenv import load_dotenv
import os
# Add references
# Add references
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import DocumentAnalysisClient
def main():
# Clear the console
os.system('cls' if os.name=='nt' else 'clear')
try:
# Get config settings
load_dotenv()
endpoint = os.getenv('ENDPOINT')
key = os.getenv('KEY')
# Set analysis settings
fileUri = "https://github.com/MicrosoftLearning/mslearn-ai-information-extraction/blob/main/Labfiles/prebuilt-doc-intelligence/sample-invoice/sample-invoice.pdf?raw=true"
fileLocale = "en-US"
fileModelId = "prebuilt-invoice"
print(f"\nConnecting to Forms Recognizer at: {endpoint}")
print(f"Analyzing invoice at: {fileUri}")
# Create the client
# Create the client
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
# Analyse the invoice
# Analyse the invoice
poller = document_analysis_client.begin_analyze_document_from_url(
fileModelId, fileUri, locale=fileLocale
)
# Create the client
document_analysis_client = DocumentAnalysisClient(
endpoint=endpoint, credential=AzureKeyCredential(key)
)
# Display invoice information to the user
# Display invoice information to the user
receipts = poller.result()
for idx, receipt in enumerate(receipts.documents):
vendor_name = receipt.fields.get("VendorName")
if vendor_name:
print(f"\nVendor Name: {vendor_name.value}, with confidence {vendor_name.confidence}.")
customer_name = receipt.fields.get("CustomerName")
if customer_name:
print(f"Customer Name: '{customer_name.value}, with confidence {customer_name.confidence}.")
invoice_total = receipt.fields.get("InvoiceTotal")
if invoice_total:
print(f"Invoice Total: '{invoice_total.value.symbol}{invoice_total.value.amount}, with confidence {invoice_total.confidence}.")
except Exception as ex:
print(ex)
print("\nAnalysis complete.\n")
if __name__ == "__main__":
main()
