Convert images to text on Android
Nutrient Android SDK supports extracting textual information from a scanned PDF. To do so, follow the steps below:
-
First, convert the image into a PDF file as described in the image-to-PDF conversion guide.
-
Next, perform OCR on the PDF file so that the textual information is extracted out of the PDF. This process is described in detail in the converting a scan into a searchable PDF guide.
-
After performing OCR on the document, retrieve the text, text blocks, words, or glyphs from the pages. Here’s a detailed guide explaining how to do that.
This entire process is explained with the sample code below:
// Convert the image to a PDF file. val image: Bitmap = ... val outputFile: File = ... // Writable file. val imageSize = Size(image.width.toFloat(), image.height.toFloat()) val pageImage = PageImage(image, PagePosition.CENTER).apply { setJpegQuality(70) } val newPage = NewPage.emptyPage(imageSize).withPageItem(pageImage).build() val creationTask = PdfProcessorTask.newPage(newPage) val disposable = PdfProcessor.processDocumentAsync(creationTask, outputFile) .subscribe( { progress -> }, // onNext { throwable -> }, // onError { // Perform OCR on the file. val document = PdfDocumentLoader.openDocument(context, Uri.parse(outputFile.absolutePath)) val ocrTask = PdfProcessorTask .fromDocument(document) .performOcrOnPages((0 until document.pageCount).toSet(), OcrLanguage.ENGLISH) val ocrFile: File = ... // Writable file. ocrDisposable = PdfProcessor.processDocumentAsync(ocrTask, ocrFile) .subscribe( { progress -> }, // onNext { throwable -> }, // onError { // `onComplete` val ocrDocument = PdfDocumentLoader.openDocument(context, Uri.parse(ocrFile.absolutePath)) // Retrieve text from the document. val pageText = ocrDocument.getPageText(0) Log.d("PSPDFKit OCR", pageText) } ) } )
// Convert the image to a PDF file. final Bitmap image = ... final File outputFile = ... // Writable file. final Size imageSize = new Size(image.getWidth(), image.getHeight()); final PageImage pageImage = new PageImage(image, PagePosition.CENTER); pageImage.setJpegQuality(70); final NewPage newPage = NewPage.emptyPage(imageSize).withPageItem(pageImage).build(); final PdfProcessorTask creationTask = PdfProcessorTask.newPage(newPage); final Disposable disposable = PdfProcessor.processDocumentAsync(creationTask, outputFile) .subscribe( progress -> { }, // onNext throwable -> { }, // onError () -> { // Perform OCR on the file. final PdfDocument document = PdfDocumentLoader .openDocument(context, Uri.parse(outputFile.getAbsolutePath())); final Set<Integer> pages = new HashSet(); for (int i = 0; i < document.getPageCount(); i++) { pages.add(i); } final PdfProcessorTask ocrTask = PdfProcessorTask .fromDocument(document) .performOcrOnPages(pages, OcrLanguage.ENGLISH); final File ocrFile = ... // Writable file. ocrDisposable = PdfProcessor.processDocumentAsync(ocrTask, ocrFile) .subscribe( progress -> { }, // onNext throwable -> { }, // onError () -> { // `onComplete` final PdfDocument ocrDocument = PdfDocumentLoader .openDocument(context, Uri.parse(ocrFile.getAbsolutePath())); // Retrieve text from the document. final String pageText = ocrDocument.getPageText(0); Log.d("PSPDFKit OCR", pageText); } ); } );