Convert images to text on Android

Nutrient Android SDK supports extracting textual information from a scanned PDF. To do so, follow the steps below:

This entire process is explained with the sample code below:

// Convert the image to a PDF file.
val image: Bitmap = ...
val outputFile: File = ... // Writable file.
val imageSize = Size(image.width.toFloat(), image.height.toFloat())
val pageImage = PageImage(image, PagePosition.CENTER).apply { setJpegQuality(70) }
val newPage = NewPage.emptyPage(imageSize).withPageItem(pageImage).build()
val creationTask = PdfProcessorTask.newPage(newPage)
val disposable = PdfProcessor.processDocumentAsync(creationTask, outputFile)
.subscribe(
{ progress -> }, // onNext
{ throwable -> }, // onError
{
// Perform OCR on the file.
val document = PdfDocumentLoader.openDocument(context, Uri.parse(outputFile.absolutePath))
val ocrTask = PdfProcessorTask
.fromDocument(document)
.performOcrOnPages((0 until document.pageCount).toSet(), OcrLanguage.ENGLISH)
val ocrFile: File = ... // Writable file.
ocrDisposable = PdfProcessor.processDocumentAsync(ocrTask, ocrFile)
.subscribe(
{ progress -> }, // onNext
{ throwable -> }, // onError
{
// `onComplete`
val ocrDocument = PdfDocumentLoader.openDocument(context, Uri.parse(ocrFile.absolutePath))
// Retrieve text from the document.
val pageText = ocrDocument.getPageText(0)
Log.d("PSPDFKit OCR", pageText)
}
)
}
)