Dim caption As String = "Example: OcrPage"
Dim message As String = ""
Dim page_text As String = ""
'Expecting that the input pdf document includes scanned pages.
Const inputPdfPath As String = "test.pdf"
Const outputPdfPath As String = "test_done.pdf"
Const textFile As String = "test_text_after_ocr.txt"
'The path to language-files installed for OCR - please make sure this path is correct according to your installation.
Const pathToOcr As String = "C:\GdPicture.NET 14\Redist\OCR"
Dim gdpicturePDF As New GdPicturePDF()
If gdpicturePDF.LoadFromFile(inputPdfPath, False) = GdPictureStatus.OK Then
Dim pageCount As Integer = gdpicturePDF.GetPageCount()
If gdpicturePDF.GetStat() <> GdPictureStatus.OK Then
MessageBox.Show("The GetPageCount() method has failed with the status: " + gdpicturePDF.GetStat().ToString(), caption)
Goto [Error]
End If
If pageCount = 0 Then
MessageBox.Show("This input PDF document contains no pages.", caption)
Goto [Error]
End If
Dim text_file As New System.IO.StreamWriter(textFile)
For i As Integer = 1 To pageCount
If gdpicturePDF.SelectPage(i) = GdPictureStatus.OK Then
Dim hasText As Boolean = gdpicturePDF.PageHasText()
If gdpicturePDF.GetStat() = GdPictureStatus.OK Then
If hasText AndAlso (MessageBox.Show("The page nr." + i.ToString() + " already has text. Do you want to skip this page?",
caption, MessageBoxButtons.YesNo, MessageBoxIcon.[Stop]) = System.Windows.Forms.DialogResult.Yes) Then
message = message + "The page nr." + i.ToString() + " has been skipped." + vbCrLf
text_file.WriteLine("The page nr." + i.ToString() + " has been skipped.")
Continue For
End If
If gdpicturePDF.OcrPage("eng", pathToOcr, "", 300) = GdPictureStatus.OK Then
message = message + "The page nr." + i.ToString() + " has been successfully processed." + vbCrLf
text_file.WriteLine("The page nr." + i.ToString() + " contains this text:")
page_text = gdpicturePDF.GetPageText()
If gdpicturePDF.GetStat() = GdPictureStatus.OK Then
text_file.WriteLine(page_text)
End If
Else
message = message + "The OcrPage() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + vbCrLf
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.")
End If
Else
message = message + "The PageHasText() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + vbCrLf
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.")
End If
Else
message = message + "The SelectPage() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + vbCrLf
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.")
End If
Next
If gdpicturePDF.SaveToFile(outputPdfPath, True) = GdPictureStatus.OK Then
message = message + "The file has been successfully saved."
Else
message = message + "The file can't be saved."
End If
MessageBox.Show(message, caption)
text_file.Close()
Else
MessageBox.Show("The file can't be loaded.", caption)
End If
[error]:
gdpicturePDF.Dispose()
string caption = "Example: OcrPage";
string message = "";
string page_text = "";
//Expecting that the input pdf document includes scanned pages.
const string inputPdfPath = "test.pdf";
const string outputPdfPath = "test_done.pdf";
const string textFile = "test_text_after_ocr.txt";
//The path to language-files installed for OCR - please make sure this path is correct according to your installation.
const string pathToOcr = "C:\\GdPicture.NET 14\\Redist\\OCR";
GdPicturePDF gdpicturePDF = new GdPicturePDF();
if (gdpicturePDF.LoadFromFile(inputPdfPath, false) == GdPictureStatus.OK)
{
int pageCount = gdpicturePDF.GetPageCount();
if (gdpicturePDF.GetStat() != GdPictureStatus.OK)
{
MessageBox.Show("The GetPageCount() method has failed with the status: " + gdpicturePDF.GetStat().ToString(), caption);
goto error;
}
if (pageCount == 0)
{
MessageBox.Show("This input PDF document contains no pages.", caption);
goto error;
}
System.IO.StreamWriter text_file = new System.IO.StreamWriter(textFile);
for (int i = 1; i <= pageCount; i++)
{
if (gdpicturePDF.SelectPage(i) == GdPictureStatus.OK)
{
bool hasText = gdpicturePDF.PageHasText();
if (gdpicturePDF.GetStat() == GdPictureStatus.OK)
{
if (hasText &&
(MessageBox.Show("The page nr." + i.ToString() + " already has text. Do you want to skip this page?",
caption, MessageBoxButtons.YesNo, MessageBoxIcon.Stop) == System.Windows.Forms.DialogResult.Yes))
{
message = message + "The page nr." + i.ToString() + " has been skipped.\n";
text_file.WriteLine("The page nr." + i.ToString() + " has been skipped.");
continue;
}
if (gdpicturePDF.OcrPage("eng", pathToOcr, "", 300) == GdPictureStatus.OK)
{
message = message + "The page nr." + i.ToString() + " has been successfully processed.\n";
text_file.WriteLine("The page nr." + i.ToString() + " contains this text:");
page_text = gdpicturePDF.GetPageText();
if (gdpicturePDF.GetStat() == GdPictureStatus.OK)
text_file.WriteLine(page_text);
}
else
{
message = message + "The OcrPage() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + "\n";
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.");
}
}
else
{
message = message + "The PageHasText() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + "\n";
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.");
}
}
else
{
message = message + "The SelectPage() method has failed for the page nr." + i.ToString() + " with the status: " + gdpicturePDF.GetStat().ToString() + "\n";
text_file.WriteLine("The page nr." + i.ToString() + " has not been processed.");
}
}
if (gdpicturePDF.SaveToFile(outputPdfPath, true) == GdPictureStatus.OK)
message = message + "The file has been successfully saved.";
else
message = message + "The file can't be saved.";
MessageBox.Show(message, caption);
text_file.Close();
}
else
MessageBox.Show("The file can't be loaded.", caption);
error:
gdpicturePDF.Dispose();