PDFsharp & MigraDoc Foundation
https://forum.pdfsharp.net/

Trying to write PDF Passthrough where one image is removed
https://forum.pdfsharp.net/viewtopic.php?f=2&t=3069
Page 1 of 1

Author:  robbsadler [ Mon Mar 02, 2015 10:49 pm ]
Post subject:  Trying to write PDF Passthrough where one image is removed

Using GDI+ version, v1.32 (from latest nuget package)

I have seen lots of examples of extracting images, but I need to:
- find an image in a PDF (I can use size and a snippet of the value to match it)
- write out the entire pdf except the image I found.

This seems like it should be very simple, and I have plugged in all sorts of things into existing samples to no avail.

Can someone point me in the right direction?

Thanks for any assistance!

Robb

This code finds my images, but not sure what to do with them.

Code:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;

namespace ReplaceRemoveImage
{
    class Program
    {
        static void Main(string[] args)
        {
            // Get a fresh copy of the sample PDF file
            string inFile = @"c:\in.pdf";
            string outFile = @"c:\out.pdf";
            File.Copy(inFile, outFile, true);

            // Read document into memory for modification
            PdfDocument document = PdfReader.Open(outFile);

            int imageCount = 0;
           
            PdfPage page = document.Pages[0];

            // Get resources dictionary
            PdfDictionary resources = page.Elements.GetDictionary("/Resources");
            if (resources != null)
            {
                // Get external objects dictionary
                PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
                if (xObjects != null)
                {
                    ICollection<PdfItem> items = xObjects.Elements.Values;
                    // Iterate references to external objects
                    foreach (PdfItem item in items)
                    {
                        PdfReference reference = item as PdfReference;
                        if (reference != null)
                        {
                            PdfDictionary xObject = reference.Value as PdfDictionary;
                            // Is external object an image?
                            if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                            {
                                //if (xObject.Stream.Value.Length)
                                Console.WriteLine("Found image with length of {0}", xObject.Stream.Value.Length);
                            }
                        }
                    }
                }
            }
           
            var pdfObjects = document.Internals.GetAllObjects();

            foreach (PdfObject pdfObject in pdfObjects)
            {
                Console.WriteLine("Found object {0}, {1} indirect:{2}",
                    pdfObject.Internals.ObjectID, pdfObject.Internals.ObjectNumber,
                    pdfObject.IsIndirect);
            }

            // Using PDFsharp we never deal with object numbers. We simply put the
            // objects together and PDFsharp does the rest.

            // Save the document...
            document.Save(outFile);

            Console.ReadKey();
            // ...and start a viewer.
            //Process.Start(outFile);
        }

        //static void ExportImage(PdfDictionary image, ref int count)
        //{
        //    string filter = image.Elements["/Filter"].ToString();

        //    // this filter value for my PDF always contained "[ /ASCII85Decode /FlateDecode /DCTDecode ]"
        //    // the subsequent saved image is unreadable
        //    if (filter.Contains("/DCTDecode"))
        //    {
        //        ExportJpegImage(image, ref count);
        //    }
        //    else if (filter.Contains("/FlateDecode"))
        //    {
        //        ExportAsPngImage(image, ref count);
        //    }
        //}

        //static void ExportJpegImage(PdfDictionary image, ref int count)
        //{
        //    // Fortunately JPEG has native support in PDF and exporting an image is just writing the stream to a file.
        //    byte[] stream = image.Stream.Value;
        //    FileStream fs = new FileStream(String.Format(@"c:\Image{0}.jpeg", count++), FileMode.Create, FileAccess.Write);
        //    BinaryWriter bw = new BinaryWriter(fs);
        //    bw.Write(stream);
        //    bw.Close();
        //}

        //static void ExportAsPngImage(PdfDictionary image, ref int count)
        //{
        //    int width = image.Elements.GetInteger(PdfImage.Keys.Width);
        //    int height = image.Elements.GetInteger(PdfImage.Keys.Height);
        //    int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);

        //    // TODO: You can put the code here that converts vom PDF internal image format to a Windows bitmap
        //    // and use GDI+ to save it in PNG format.
        //    // It is the work of a day or two for the most important formats. Take a look at the file
        //    // PdfSharp.Pdf.Advanced/PdfImage.cs to see how we create the PDF image formats.
        //    // We don't need that feature at the moment and therefore will not implement it.
        //    // If you write the code for exporting images I would be pleased to publish it in a future release
        //    // of PDFsharp.
        //}


    }
}

Author:  robbsadler [ Wed Mar 04, 2015 5:28 pm ]
Post subject:  Re: Trying to write PDF Passthrough where one image is remov

I see most posts are answered pretty quickly. Did I post this poorly, or is it a bad time?

If it's not possible, that would be great to know.

Thanks for a great tool.

Thanks!

Page 1 of 1 All times are UTC
Powered by phpBB® Forum Software © phpBB Group
https://www.phpbb.com/