Using GDI+ version, v1.32 (from latest nuget package)
I have seen lots of examples of extracting images, but I need to:
- find an image in a PDF (I can use size and a snippet of the value to match it)
- write out the entire pdf
except the image I found.
This seems like it should be very simple, and I have plugged in all sorts of things into existing samples to no avail.
Can someone point me in the right direction?
Thanks for any assistance!
Robb
This code finds my images, but not sure what to do with them.
Code:
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using PdfSharp.Pdf;
using PdfSharp.Pdf.Advanced;
using PdfSharp.Pdf.IO;
namespace ReplaceRemoveImage
{
class Program
{
static void Main(string[] args)
{
// Get a fresh copy of the sample PDF file
string inFile = @"c:\in.pdf";
string outFile = @"c:\out.pdf";
File.Copy(inFile, outFile, true);
// Read document into memory for modification
PdfDocument document = PdfReader.Open(outFile);
int imageCount = 0;
PdfPage page = document.Pages[0];
// Get resources dictionary
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
ICollection<PdfItem> items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
//if (xObject.Stream.Value.Length)
Console.WriteLine("Found image with length of {0}", xObject.Stream.Value.Length);
}
}
}
}
}
var pdfObjects = document.Internals.GetAllObjects();
foreach (PdfObject pdfObject in pdfObjects)
{
Console.WriteLine("Found object {0}, {1} indirect:{2}",
pdfObject.Internals.ObjectID, pdfObject.Internals.ObjectNumber,
pdfObject.IsIndirect);
}
// Using PDFsharp we never deal with object numbers. We simply put the
// objects together and PDFsharp does the rest.
// Save the document...
document.Save(outFile);
Console.ReadKey();
// ...and start a viewer.
//Process.Start(outFile);
}
//static void ExportImage(PdfDictionary image, ref int count)
//{
// string filter = image.Elements["/Filter"].ToString();
// // this filter value for my PDF always contained "[ /ASCII85Decode /FlateDecode /DCTDecode ]"
// // the subsequent saved image is unreadable
// if (filter.Contains("/DCTDecode"))
// {
// ExportJpegImage(image, ref count);
// }
// else if (filter.Contains("/FlateDecode"))
// {
// ExportAsPngImage(image, ref count);
// }
//}
//static void ExportJpegImage(PdfDictionary image, ref int count)
//{
// // Fortunately JPEG has native support in PDF and exporting an image is just writing the stream to a file.
// byte[] stream = image.Stream.Value;
// FileStream fs = new FileStream(String.Format(@"c:\Image{0}.jpeg", count++), FileMode.Create, FileAccess.Write);
// BinaryWriter bw = new BinaryWriter(fs);
// bw.Write(stream);
// bw.Close();
//}
//static void ExportAsPngImage(PdfDictionary image, ref int count)
//{
// int width = image.Elements.GetInteger(PdfImage.Keys.Width);
// int height = image.Elements.GetInteger(PdfImage.Keys.Height);
// int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);
// // TODO: You can put the code here that converts vom PDF internal image format to a Windows bitmap
// // and use GDI+ to save it in PNG format.
// // It is the work of a day or two for the most important formats. Take a look at the file
// // PdfSharp.Pdf.Advanced/PdfImage.cs to see how we create the PDF image formats.
// // We don't need that feature at the moment and therefore will not implement it.
// // If you write the code for exporting images I would be pleased to publish it in a future release
// // of PDFsharp.
//}
}
}