PDF/A
PDF/A is a document format intended for long-term preservation. PDF/A conversion API supports converting source files into all PDF/A versions and conformance levels:
- PDF/A-1a, PDF/A-1b
- PDF/A-2a, PDF/A-2u, PDF/A-2b
- PDF/A-3a, PDF/A-3u, PDF/A-3b
- PDF/A-4, PDF/A-4e, PDF/A-4f
For more information on the long-term preservation of documents, check out our demo video, or have a look at our complete guide to PDF/A.
Configuring PDF/A Conversion
PDF/A documents are intended for long-term preservation, and their structure is different from PDF documents. To ensure compliance with your chosen conformance level, the conversion process may introduce changes to the document’s content or appearance. This might change the document by adding, editing, or removing document structure elements, embedding fonts, etc.
In some cases, direct conversion isn’t possible. PDF/A conversion API then uses other techniques such as vectorization and rasterization:
- Vectorization means that if some document elements cannot be used directly in the PDF/A output, they’re embedded in the output document as vector-based graphic elements. This technique is typically used for fonts and paths.
- Rasterization means that if some document content cannot be used directly in the PDF/A output, it’s embedded in the output document as raster images.
Both approaches result in the loss of fonts and text information because the text is converted into shapes and raster images. Text information can later be recovered using optical character recognition (OCR).
To control whether vectorization or rasterization techniques should be used, set the vectorization and rasterization options to true.
curl -X POST https://api.nutrient.io/build \
-H "Authorization: Bearer your_api_key_here" \
-o result.pdf \
--fail \
-F [email protected] \
-F instructions='{
"parts": [
{
"file": "document"
}
],
"output": {
"type": "pdfa",
"conformance": "pdfa-2a",
"vectorization": true,
"rasterization": true
}
}'
curl -X POST https://api.nutrient.io/build ^
-H "Authorization: Bearer your_api_key_here" ^
-o result.pdf ^
--fail ^
-F [email protected] ^
-F instructions="{\"parts\": [{\"file\": \"document\"}], \"output\": {\"type\": \"pdfa\", \"conformance\": \"pdfa-2a\", \"vectorization\": true, \"rasterization\": true}}"
package com.example.pspdfkit;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import org.json.JSONArray;
import org.json.JSONObject;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
public final class PspdfkitApiExample {
public static void main(final String[] args) throws IOException {
final RequestBody body = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart(
"document",
"document.pdf",
RequestBody.create(
MediaType.parse("application/pdf"),
new File("document.pdf")
)
)
.addFormDataPart(
"instructions",
new JSONObject()
.put("parts", new JSONArray()
.put(new JSONObject()
.put("file", "document")
)
)
.put("output", new JSONObject()
.put("type", "pdfa")
.put("conformance", "pdfa-2a")
.put("vectorization", true)
.put("rasterization", true)
).toString()
)
.build();
final Request request = new Request.Builder()
.url("https://api.nutrient.io/build")
.method("POST", body)
.addHeader("Authorization", "Bearer your_api_key_here")
.build();
final OkHttpClient client = new OkHttpClient()
.newBuilder()
.build();
final Response response = client.newCall(request).execute();
if (response.isSuccessful()) {
Files.copy(
response.body().byteStream(),
FileSystems.getDefault().getPath("result.pdf"),
StandardCopyOption.REPLACE_EXISTING
);
} else {
// Handle the error
throw new IOException(response.body().string());
}
}
}
using System;
using System.IO;
using System.Net;
using RestSharp;
namespace PspdfkitApiDemo
{
class Program
{
static void Main(string[] args)
{
var client = new RestClient("https://api.nutrient.io/build");
var request = new RestRequest(Method.POST)
.AddHeader("Authorization", "Bearer your_api_key_here")
.AddFile("document", "document.pdf")
.AddParameter("instructions", new JsonObject
{
["parts"] = new JsonArray
{
new JsonObject
{
["file"] = "document"
}
},
["output"] = new JsonObject
{
["type"] = "pdfa",
["conformance"] = "pdfa-2a",
["vectorization"] = true,
["rasterization"] = true
}
}.ToString());
request.AdvancedResponseWriter = (responseStream, response) =>
{
if (response.StatusCode == HttpStatusCode.OK)
{
using (responseStream)
{
using var outputFileWriter = File.OpenWrite("result.pdf");
responseStream.CopyTo(outputFileWriter);
}
}
else
{
var responseStreamReader = new StreamReader(responseStream);
Console.Write(responseStreamReader.ReadToEnd());
}
};
client.Execute(request);
}
}
}
// This code requires Node.js. Do not run this code directly in a web browser.
const axios = require('axios')
const FormData = require('form-data')
const fs = require('fs')
const formData = new FormData()
formData.append('instructions', JSON.stringify({
parts: [
{
file: "document"
}
],
output: {
type: "pdfa",
conformance: "pdfa-2a",
vectorization: true,
rasterization: true
}
}))
formData.append('document', fs.createReadStream('document.pdf'))
;(async () => {
try {
const response = await axios.post('https://api.nutrient.io/build', formData, {
headers: formData.getHeaders({
'Authorization': 'Bearer your_api_key_here'
}),
responseType: "stream"
})
response.data.pipe(fs.createWriteStream("result.pdf"))
} catch (e) {
const errorString = await streamToString(e.response.data)
console.log(errorString)
}
})()
function streamToString(stream) {
const chunks = []
return new Promise((resolve, reject) => {
stream.on("data", (chunk) => chunks.push(Buffer.from(chunk)))
stream.on("error", (err) => reject(err))
stream.on("end", () => resolve(Buffer.concat(chunks).toString("utf8")))
})
}
import requests
import json
response = requests.request(
'POST',
'https://api.nutrient.io/build',
headers = {
'Authorization': 'Bearer your_api_key_here'
},
files = {
'document': open('document.pdf', 'rb')
},
data = {
'instructions': json.dumps({
'parts': [
{
'file': 'document'
}
],
'output': {
'type': 'pdfa',
'conformance': 'pdfa-2a',
'vectorization': true,
'rasterization': true
}
})
},
stream = True
)
if response.ok:
with open('result.pdf', 'wb') as fd:
for chunk in response.iter_content(chunk_size=8096):
fd.write(chunk)
else:
print(response.text)
exit()
<?php
$FileHandle = fopen('result.pdf', 'w+');
$curl = curl_init();
curl_setopt_array($curl, array(
CURLOPT_URL => 'https://api.nutrient.io/build',
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_POSTFIELDS => array(
'instructions' => '{
"parts": [
{
"file": "document"
}
],
"output": {
"type": "pdfa",
"conformance": "pdfa-2a",
"vectorization": true,
"rasterization": true
}
}',
'document' => new CURLFILE('document.pdf')
),
CURLOPT_HTTPHEADER => array(
'Authorization: Bearer your_api_key_here'
),
CURLOPT_FILE => $FileHandle,
));
$response = curl_exec($curl);
curl_close($curl);
fclose($FileHandle);
POST https://api.nutrient.io/build HTTP/1.1
Content-Type: multipart/form-data; boundary=--customboundary
Authorization: Bearer your_api_key_here
--customboundary
Content-Disposition: form-data; name="instructions"
Content-Type: application/json
{
"parts": [
{
"file": "document"
}
],
"output": {
"type": "pdfa",
"conformance": "pdfa-2a",
"vectorization": true,
"rasterization": true
}
}
--customboundary
Content-Disposition: form-data; name="document"; filename="document.pdf"
Content-Type: application/pdf
(document data)
--customboundary--