New to Telerik Document Processing? Download free 30-day trial

Summarizing the Text Content of PDF Documents using Text Analytics with Azure AI services

Environment

Version Product Author
Q1 2025 RadPdfProcessing Desislava Yordanova

Description

Learn how to summarize the text content of a PDF document using Text Analytics with Azure AI services.

Solution

Follow the steps:

1. Before going further, you can find listed below the required assemblies/NuGet packages that should be added to your project:

2. It is necessary to generate your Azure AI key and endpoint: Get your credentials from your Azure AI services resource

Azure AI key

3. Extract the text content from a PDF document.

4. Use the custom implementation to summarize the text content extracted in step 3:

        static void Main(string[] args)
        {
            Telerik.Windows.Documents.Fixed.FormatProviders.Pdf.PdfFormatProvider pdf_provider = new PdfFormatProvider();
            Telerik.Windows.Documents.Fixed.FormatProviders.Text.TextFormatProvider text_provider = new TextFormatProvider();
            Telerik.Windows.Documents.Fixed.Model.RadFixedDocument document = pdf_provider.Import(File.ReadAllBytes("PdfDocument.pdf"), TimeSpan.FromSeconds(10));
            string documentTextContent = text_provider.Export(document);

            AzureTextSummarizationProvider summarizationProvider = new AzureTextSummarizationProvider(azure_key, azure_endpoint);
            string summary = summarizationProvider.SummarizeText(documentTextContent).Result;

            Console.WriteLine(summary);
        }

        public class AzureTextSummarizationProvider 
        {
            private string languageKey;
            private string languageEndpoint;

            public AzureTextSummarizationProvider(string azure_key, string azure_endpoint)
            {
                this.languageKey = azure_key;
                this.languageEndpoint = azure_endpoint;
            }

            public async Task<string> SummarizeText(string text)
            {
                Azure.AzureKeyCredential credentials = new Azure.AzureKeyCredential(languageKey);
                Uri endpoint = new Uri(languageEndpoint);

                Azure.AI.TextAnalytics.TextAnalyticsClient client = new Azure.AI.TextAnalytics.TextAnalyticsClient(endpoint, credentials);

                // Prepare analyze operation input. You can add multiple documents to this list and perform the same
                // operation to all of them.
                List<string> batchInput = new List<string>
            {
                text
            };

                Azure.AI.TextAnalytics.TextAnalyticsActions actions = new Azure.AI.TextAnalytics.TextAnalyticsActions()
                {
                    ExtractiveSummarizeActions = [new Azure.AI.TextAnalytics.ExtractiveSummarizeAction()]
                };

                // Start analysis process.
                Azure.AI.TextAnalytics.AnalyzeActionsOperation operation = await client.StartAnalyzeActionsAsync(batchInput, actions);
                await operation.WaitForCompletionAsync();

                System.Text.StringBuilder stringBuilder = new System.Text.StringBuilder();
                // View operation status.
                stringBuilder.AppendLine($"AnalyzeActions operation has completed");
                stringBuilder.AppendLine();

                stringBuilder.AppendLine($"Created On   : {operation.CreatedOn}");
                stringBuilder.AppendLine($"Expires On   : {operation.ExpiresOn}");
                stringBuilder.AppendLine($"Id           : {operation.Id}");
                stringBuilder.AppendLine($"Status       : {operation.Status}");

                stringBuilder.AppendLine();
                // View operation results.
                await foreach (Azure.AI.TextAnalytics.AnalyzeActionsResult documentsInPage in operation.Value)
                {
                    IReadOnlyCollection<Azure.AI.TextAnalytics.ExtractiveSummarizeActionResult> summaryResults = documentsInPage.ExtractiveSummarizeResults;

                    foreach (Azure.AI.TextAnalytics.ExtractiveSummarizeActionResult summaryActionResults in summaryResults)
                    {
                        if (summaryActionResults.HasError)
                        {
                            stringBuilder.AppendLine($"  Error!");
                            stringBuilder.AppendLine($"  Action error code: {summaryActionResults.Error.ErrorCode}.");
                            stringBuilder.AppendLine($"  Message: {summaryActionResults.Error.Message}");
                            continue;
                        }

                        foreach (Azure.AI.TextAnalytics.ExtractiveSummarizeResult documentResults in summaryActionResults.DocumentsResults)
                        {
                            if (documentResults.HasError)
                            {
                                stringBuilder.AppendLine($"  Error!");
                                stringBuilder.AppendLine($"  Document error code: {documentResults.Error.ErrorCode}.");
                                stringBuilder.AppendLine($"  Message: {documentResults.Error.Message}");
                                continue;
                            }

                            stringBuilder.AppendLine($"  Extracted the following {documentResults.Sentences.Count} sentence(s):");
                            stringBuilder.AppendLine();

                            foreach (Azure.AI.TextAnalytics.ExtractiveSummarySentence sentence in documentResults.Sentences)
                            {
                                stringBuilder.Append($"{sentence.Text} ");
                            }
                        }
                    }
                }

                string result = stringBuilder.ToString();

                return result;
            }
        }             

See Also

In this article