Do You Have Sample Code for KVP Extraction Functionality?
Document Converter Services version 11.0 features a key-value pair (KVP) extraction engine. The sample code below shows how the engine is set up to extract the invoice number and the grand total from a document:
static void Main(string[] args) { string expectedKeys = @"[ { 'expectedKey': 'grand total', 'synonyms': ['total'] }, { 'expectedKey': 'invoice number', 'synonyms': ['invoice no'] } ]"; DocumentConverterServiceClient client = null; try { // ** Delete any processed files from a previous run. foreach (FileInfo f in new DirectoryInfo(".").GetFiles("*.json")) f.Delete(); // ** Determine the source file and read it into a byte array. string sourceFileName = null; if (args.Length == 0) { // ** If nothing is specified, then read the first PDF file from the current folder. string[] sourceFiles = Directory.GetFiles(Directory.GetCurrentDirectory(), "*.pdf"); if (sourceFiles.Length > 0) sourceFileName = sourceFiles[0]; else { Console.WriteLine("Please specify a document to extract data from."); Console.ReadKey(); return; } } else sourceFileName = args[0]; byte[] sourceFile = File.ReadAllBytes(sourceFileName); // ** Open the service and configure the bindings. client = OpenService(SERVICE_URL); //** Set the absolute minimum open options. OpenOptions openOptions = new OpenOptions(); openOptions.OriginalFileName = Path.GetFileName(sourceFileName); openOptions.FileExtension = Path.GetExtension(sourceFileName); //** Set KVP settings. KVPSettings kvpSettings = new KVPSettings(); kvpSettings.DPI = 300; kvpSettings.KVPFormat = KVPOutputFormat.JSON; kvpSettings.OCRLanguage = "eng"; kvpSettings.IncludePageNumber = BooleanEnum.False; kvpSettings.IncludeType = BooleanEnum.False; kvpSettings.IncludeKeyBoundingBox = BooleanEnum.False; kvpSettings.IncludeValueBoundingBox = BooleanEnum.False; if (!string.IsNullOrEmpty(expectedKeys)) { kvpSettings.ExpectedKeys = expectedKeys; } // ** Carry out the operation. byte[] result = client.ExtractKeyValuePairs(sourceFile, openOptions, kvpSettings); if (result != null) { string destinationFileName = Path.GetFileNameWithoutExtension(sourceFileName) + "." + kvpSettings.KVPFormat.ToString(); using (FileStream fs = File.Create(destinationFileName)) { fs.Write(result, 0, result.Length); fs.Close(); } Console.WriteLine("Result saved to " + destinationFileName); } else { Console.WriteLine("Nothing returned."); } } catch (FaultException<WebServiceFaultException> ex) { Console.WriteLine("FaultException occurred: ExceptionType: " + ex.Detail.ExceptionType.ToString()); } catch (Exception ex) { Console.WriteLine(ex.ToString()); } finally { CloseService(client); } Console.ReadKey(); } /// <summary> /// Configure the bindings and endpoints and open the service using the specified address. /// </summary> /// <returns>An instance of the web service.</returns> public static DocumentConverterServiceClient OpenService(string address) { DocumentConverterServiceClient client = null; try { BasicHttpBinding binding = new BasicHttpBinding(); // ** Use standard Windows Security. binding.Security.Mode = BasicHttpSecurityMode.TransportCredentialOnly; binding.Security.Transport.ClientCredentialType = HttpClientCredentialType.Windows; // ** Increase the client timeout to deal with (very) long running requests. binding.SendTimeout = TimeSpan.FromMinutes(120); binding.ReceiveTimeout = TimeSpan.FromMinutes(120); // ** Set the maximum document size to 50MB binding.MaxReceivedMessageSize = 50 * 1024 * 1024; binding.ReaderQuotas.MaxArrayLength = 50 * 1024 * 1024; binding.ReaderQuotas.MaxStringContentLength = 50 * 1024 * 1024; // ** Specify an identity (any identity) to get past .net3.5 sp1. EndpointIdentity epi = EndpointIdentity.CreateUpnIdentity("unknown"); EndpointAddress epa = new EndpointAddress(new Uri(address), epi); client = new DocumentConverterServiceClient(binding, epa); client.Open(); return client; } catch (Exception) { CloseService(client); throw; } } /// <summary> /// Check if the client is open and then close it. /// </summary> /// <param name="client">The client to close</param> public static void CloseService(DocumentConverterServiceClient client) { if (client != null && client.State == CommunicationState.Opened) client.Close(); }