Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Apply-Confidence-Threshold-to-Extract-data/Apply-Confidence-Threshold-to-Extract-data.csproj" />
</Solution>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Apply_Confidence_Threshold_to_Extract_data</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.IO;
using Syncfusion.Pdf.Parsing;
using Syncfusion.SmartDataExtractor;

namespace ApplyConfidenceThresholdToExtractData
{
class Program
{
static void Main(string[] args)
{
// Load the input PDF file.
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
{
// Initialize the Smart Data Extractor.
DataExtractor extractor = new DataExtractor();

// Apply confidence threshold to extract the data.
// Only elements with confidence >= 0.75 will be included in the results.
// Default confidence threshold value is 0.6.
extractor.ConfidenceThreshold = 0.75;

// Extract data and return as a loaded PDF document.
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);

// Save the extracted output as a new PDF file.
pdf.Save(@"Output\Output.pdf");

// Close the document to release resources.
pdf.Close(true);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Different-form-recognizer-options/Different-form-recognizer-options.csproj" />
</Solution>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Different_form_recognizer_options</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using System.IO;
using Syncfusion.Pdf.Parsing;
using Syncfusion.SmartDataExtractor;
using Syncfusion.SmartFormRecognizer;

namespace DifferentFormRecognizerOptions
{
class Program
{
static void Main(string[] args)
{
// Open the input PDF file as a stream.
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
{
// Initialize the Smart Data Extractor.
DataExtractor extractor = new DataExtractor();

// Enable form detection in the document to identify form fields.
extractor.EnableFormDetection = true;

// Configure form recognition options for advanced detection.
FormRecognizeOptions formOptions = new FormRecognizeOptions();

// Recognize forms across pages 1 to 5 in the document.
formOptions.PageRange = new int[,] { { 1, 5 } };

// Set confidence threshold for form recognition to filter results.
formOptions.ConfidenceThreshold = 0.6;

// Enable detection of signatures within the document.
formOptions.DetectSignatures = true;

// Enable detection of textboxes within the document.
formOptions.DetectTextboxes = true;

// Enable detection of checkboxes within the document.
formOptions.DetectCheckboxes = true;

// Enable detection of radio buttons within the document.
formOptions.DetectRadioButtons = true;

// Assign the configured form recognition options to the extractor.
extractor.FormRecognizeOptions = formOptions;

// Extract form data and return as a loaded PDF document.
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);

// Save the extracted output as a new PDF file.
pdf.Save(@"Output\Output.pdf");

// Close the document to release resources.
pdf.Close(true);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Different-table-extraction-options/Different-table-extraction-options.csproj" />
</Solution>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Different_table_extraction_options</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using System.IO;
using Syncfusion.Pdf.Parsing;
using Syncfusion.SmartDataExtractor;
using Syncfusion.SmartTableExtractor;

namespace DifferentTableExtractionOptions
{
class Program
{
static void Main(string[] args)
{
// Load the input PDF file.
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
{
// Initialize the Smart Data Extractor.
DataExtractor extractor = new DataExtractor();

// Enable table detection and set confidence threshold.
extractor.EnableTableDetection = true;

// Configure table extraction options.
TableExtractionOptions tableOptions = new TableExtractionOptions();

// Extract tables across pages 1 to 5.
tableOptions.PageRange = new int[,] { { 1, 5 } };

// Set confidence threshold for table extraction.
tableOptions.ConfidenceThreshold = 0.6;

// Enable detection of borderless tables.
tableOptions.DetectBorderlessTables = true;

// Assign the table extraction options to the extractor.
extractor.TableExtractionOptions = tableOptions;

// Extract data and return as a loaded PDF document.
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);

// Save the extracted output as a new PDF file.
pdf.Save(@"Output\Output.pdf");

// Close the document to release resources.
pdf.Close(true);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Disable-Form-detection/Disable-Form-detection.csproj" />
</Solution>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Disable_Form_detection</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using System.IO;
using Syncfusion.Pdf.Parsing;
using Syncfusion.SmartDataExtractor;
using Syncfusion.SmartFormRecognizer;

namespace FormDetection
{
class Program
{
static void Main(string[] args)
{
// Open the input PDF file as a stream.
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
{
// Initialize the Smart Data Extractor.
DataExtractor extractor = new DataExtractor();

// Enable or disable form detection in the document to identify form fields.
// By default this property is true.
extractor.EnableFormDetection = false;

// Extract form data and return as a loaded PDF document.
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);

// Save the extracted output as a new PDF file.
pdf.Save(@"Output\Output.pdf");

// Close the document to release resources.
pdf.Close(true);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Disable-Table-detection/Disable-Table-detection.csproj" />
</Solution>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Disable_Table_detection</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
using System.IO;
using Syncfusion.Pdf.Parsing;
using Syncfusion.SmartDataExtractor;

namespace DisableTableDetection
{
class Program
{
static void Main(string[] args)
{
// Load the input PDF file.
using (FileStream stream = new FileStream(@"Data\Input.pdf", FileMode.Open, FileAccess.Read))
{
// Initialize the Smart Data Extractor.
DataExtractor extractor = new DataExtractor();

// Enable or disable table detection and set confidence threshold.
// By default this property is true.
extractor.EnableTableDetection = false;

// Extract data and return as a loaded PDF document.
PdfLoadedDocument pdf = extractor.ExtractDataAsPdfDocument(stream);

// Save the extracted output as a new PDF file.
pdf.Save(@"Output\Output.pdf");

// Close the document to release resources.
pdf.Close(true);
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<Solution>
<Project Path="Extract-data-as-JSON-from-PDF-document/Extract-data-as-JSON-from-PDF-document.csproj" />
</Solution>
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net10.0</TargetFramework>
<RootNamespace>Extract_data_as_JSON_from_PDF_document</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Syncfusion.SmartDataExtractor.Net.Core" Version="*" />
</ItemGroup>

<ItemGroup>
<None Update="Data\Input.pdf">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Output\.gitkeep">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
Loading
Loading