A Rust library for PDF document manipulation.

Junfeng Liu

Last update: Dec 30, 2022

Related tags

Overview

lopdf

A Rust library for PDF document manipulation.

Example Code

Create PDF document

#[macro_use]
extern crate lopdf;
use lopdf::{Document, Object, Stream};
use lopdf::content::{Content, Operation};

let mut doc = Document::with_version("1.5");
let pages_id = doc.new_object_id();
let font_id = doc.add_object(dictionary! {
    "Type" => "Font",
    "Subtype" => "Type1",
    "BaseFont" => "Courier",
});
let resources_id = doc.add_object(dictionary! {
    "Font" => dictionary! {
        "F1" => font_id,
    },
});
let content = Content {
    operations: vec![
        Operation::new("BT", vec![]),
        Operation::new("Tf", vec!["F1".into(), 48.into()]),
        Operation::new("Td", vec![100.into(), 600.into()]),
        Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
        Operation::new("ET", vec![]),
    ],
};
let content_id = doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
let page_id = doc.add_object(dictionary! {
    "Type" => "Page",
    "Parent" => pages_id,
    "Contents" => content_id,
});
let pages = dictionary! {
    "Type" => "Pages",
    "Kids" => vec![page_id.into()],
    "Count" => 1,
    "Resources" => resources_id,
    "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
};
doc.objects.insert(pages_id, Object::Dictionary(pages));
let catalog_id = doc.add_object(dictionary! {
    "Type" => "Catalog",
    "Pages" => pages_id,
});
doc.trailer.set("Root", catalog_id);
doc.compress();
doc.save("example.pdf").unwrap();

Merge PDF documents

#[macro_use]
extern crate lopdf;

use std::collections::BTreeMap;

use lopdf::content::{Content, Operation};
use lopdf::{Document, Object, ObjectId, Stream, BookMark};

pub fn generate_fake_document() -> Document {
    let mut doc = Document::with_version("1.5");
    let pages_id = doc.new_object_id();
    let font_id = doc.add_object(dictionary! {
        "Type" => "Font",
        "Subtype" => "Type1",
        "BaseFont" => "Courier",
    });
    let resources_id = doc.add_object(dictionary! {
        "Font" => dictionary! {
            "F1" => font_id,
        },
    });
    let content = Content {
        operations: vec![
            Operation::new("BT", vec![]),
            Operation::new("Tf", vec!["F1".into(), 48.into()]),
            Operation::new("Td", vec![100.into(), 600.into()]),
            Operation::new("Tj", vec![Object::string_literal("Hello World!")]),
            Operation::new("ET", vec![]),
        ],
    };
    let content_id = doc.add_object(Stream::new(dictionary! {}, content.encode().unwrap()));
    let page_id = doc.add_object(dictionary! {
        "Type" => "Page",
        "Parent" => pages_id,
        "Contents" => content_id,
        "Resources" => resources_id,
        "MediaBox" => vec![0.into(), 0.into(), 595.into(), 842.into()],
    });
    let pages = dictionary! {
        "Type" => "Pages",
        "Kids" => vec![page_id.into()],
        "Count" => 1,
    };
    doc.objects.insert(pages_id, Object::Dictionary(pages));
    let catalog_id = doc.add_object(dictionary! {
        "Type" => "Catalog",
        "Pages" => pages_id,
    });
    doc.trailer.set("Root", catalog_id);

    doc
}

fn main() {
    // Generate a stack of Documents to merge
    let documents = vec![
        generate_fake_document(),
        generate_fake_document(),
        generate_fake_document(),
        generate_fake_document(),
    ];

    // Define a starting max_id (will be used as start index for object_ids)
    let mut max_id = 1;
    let mut pagenum = 1;
    // Collect all Documents Objects grouped by a map
    let mut documents_pages = BTreeMap::new();
    let mut documents_objects = BTreeMap::new();
    let mut document = Document::with_version("1.5");

    for mut doc in documents {
        let mut first = false;
        doc.renumber_objects_with(max_id);

        max_id = doc.max_id + 1;

        documents_pages.extend(
            doc
                    .get_pages()
                    .into_iter()
                    .map(|(_, object_id)| {
                        if !first {
                            let bookmark = BookMark::new(String::from(format!("Page_{}", pagenum)), [0.0, 0.0, 1.0], 0, object_id);
                            document.add_bookmark(bookmark, None);
                            first = true;
                            pagenum += 1;
                        }

                        (
                            object_id,
                            doc.get_object(object_id).unwrap().to_owned(),
                        )
                    })
                    .collect::<BTreeMap<ObjectId, Object>>(),
        );
        documents_objects.extend(doc.objects);
    }

    // Catalog and Pages are mandatory
    let mut catalog_object: Option<(ObjectId, Object)> = None;
    let mut pages_object: Option<(ObjectId, Object)> = None;

    // Process all objects except "Page" type
    for (object_id, object) in documents_objects.iter() {
        // We have to ignore "Page" (as are processed later), "Outlines" and "Outline" objects
        // All other objects should be collected and inserted into the main Document
        match object.type_name().unwrap_or("") {
            "Catalog" => {
                // Collect a first "Catalog" object and use it for the future "Pages"
                catalog_object = Some((
                    if let Some((id, _)) = catalog_object {
                        id
                    } else {
                        *object_id
                    },
                    object.clone(),
                ));
            }
            "Pages" => {
                // Collect and update a first "Pages" object and use it for the future "Catalog"
                // We have also to merge all dictionaries of the old and the new "Pages" object
                if let Ok(dictionary) = object.as_dict() {
                    let mut dictionary = dictionary.clone();
                    if let Some((_, ref object)) = pages_object {
                        if let Ok(old_dictionary) = object.as_dict() {
                            dictionary.extend(old_dictionary);
                        }
                    }

                    pages_object = Some((
                        if let Some((id, _)) = pages_object {
                            id
                        } else {
                            *object_id
                        },
                        Object::Dictionary(dictionary),
                    ));
                }
            }
            "Page" => {}     // Ignored, processed later and separately
            "Outlines" => {} // Ignored, not supported yet
            "Outline" => {}  // Ignored, not supported yet
            _ => {
                document.objects.insert(*object_id, object.clone());
            }
        }
    }

    // If no "Pages" found abort
    if pages_object.is_none() {
        println!("Pages root not found.");

        return;
    }

    // Iter over all "Page" and collect with the parent "Pages" created before
    for (object_id, object) in documents_pages.iter() {
        if let Ok(dictionary) = object.as_dict() {
            let mut dictionary = dictionary.clone();
            dictionary.set("Parent", pages_object.as_ref().unwrap().0);

            document
                    .objects
                    .insert(*object_id, Object::Dictionary(dictionary));
        }
    }

    // If no "Catalog" found abort
    if catalog_object.is_none() {
        println!("Catalog root not found.");

        return;
    }

    let catalog_object = catalog_object.unwrap();
    let pages_object = pages_object.unwrap();

    // Build a new "Pages" with updated fields
    if let Ok(dictionary) = pages_object.1.as_dict() {
        let mut dictionary = dictionary.clone();

        // Set new pages count
        dictionary.set("Count", documents_pages.len() as u32);

        // Set new "Kids" list (collected from documents pages) for "Pages"
        dictionary.set(
            "Kids",
            documents_pages
                    .into_iter()
                    .map(|(object_id, _)| Object::Reference(object_id))
                    .collect::<Vec<_>>(),
        );

        document
                .objects
                .insert(pages_object.0, Object::Dictionary(dictionary));
    }

    // Build a new "Catalog" with updated fields
    if let Ok(dictionary) = catalog_object.1.as_dict() {
        let mut dictionary = dictionary.clone();
        dictionary.set("Pages", pages_object.0);
        dictionary.remove(b"Outlines"); // Outlines not supported in merged PDFs

        document
                .objects
                .insert(catalog_object.0, Object::Dictionary(dictionary));
    }

    document.trailer.set("Root", catalog_object.0);

    // Update the max internal ID as wasn't updated before due to direct objects insertion
    document.max_id = document.objects.len() as u32;

    // Reorder all new Document objects
    document.renumber_objects();

     //Set any Bookmarks to the First child if they are not set to a page
    document.adjust_zero_pages();

    //Set all bookmarks to the PDF Object tree then set the Outlines to the Bookmark content map.
    if let Some(n) = document.build_outline() {
        if let Ok(x) = document.get_object_mut(catalog_object.0) {
            if let Object::Dictionary(ref mut dict) = x {
                dict.set("Outlines", Object::Reference(n));
            }
        }
    }

    document.compress();

    // Save the merged PDF
    document.save("merged.pdf").unwrap();
}

Modify PDF document

let mut doc = Document::load("example.pdf")?;
doc.version = "1.4".to_string();
doc.replace_text(1, "Hello World!", "Modified text!");
doc.save("modified.pdf")?;

FAQ

Why keeping everything in memory as high-level objects until finally serializing the entire document?

Normally a PDF document won't be very large, ranging form tens of KB to hundreds of MB. Memory size is not a bottle neck for today's computer. By keep the whole document in memory, stream length can be pre-calculated, no need to use a reference object for the Length entry, the resulting PDF file is smaller for distribution and faster for PDF consumers to process.

Producing is a one-time effort, while consuming is many more.

Comments

[Question] Looking for a crate to parse and extract content from PDF

First of all: really nice work! I am looking for a Rust crate to parse a PDF and extract the content from it. For example, to extract each text line and metadata from the first page, to get which font style and/or font family belongs to the given line, etc.

I am asking if this crate supports a good depth of extraction (for example, if the crate supports already font family and font size extraction for a single line, or word, things like that...). Can you give me some information about that please?

Thanks a lot

opened by k0pernicus 11
Parsing of all objects on load is slow
I've done a bit of comparision benchmarking for extracting URLs from PDF files.

Test file: PDF 1.7 specification https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf

(This file is encrypted with an empty password, that is another feature I would like to bring to lopdf. My code using lopdf cannot yet unscramble strings.)

Using PyPDF2, the process takes 2.7s while just loading the file takes 43s using lopdf. PyPDF2 has some stability / looping issues that are a no-go for me. I would like to improve lopdf performance by a lot.

On load, most of the time is spent in the pom parser in parser.rs.

I see three approches that would be workable:

Make the parser a LOT faster somehow. This keeps the current API and objects can be accessed immutably in parallel code.

Do on demand parsing of objects. Somewhat complex, can introduce mutability issues, might break the API.

Parallelize the parsing using rayon. Would bring a performance gain, but would still consume the same amount of CPU time. Are you open to adding rayon as a dependency ?

From a design perspective, what would be the approaches that make the most sense to you ? I have some time to spend on improving lopdf over the coming weeks.
opened by jothan 9

imagen not found in `lopdf::xobject

Hi. I am trying to execute that example todged from [1]

use lopdf::Document;
use lopdf::xobject;

fn main() {
    let mut document = Document::load("example.pdf")?;
    document.version = "1.4".to_string();
    
    // If the stream is loaded correctly
    if let Ok(stream) = xobject::image("image.png") {
        // we need a "page_id", the position coordinates and the size of the image
        document.insert_image(page_id, stream, (50.0, 100.0), (800.0, 500.0));
    }
    
    document.save("image.pdf")?;
}

but I get this error (that image does not exist in xobject):

if let Ok(stream) = lopdf::xobject::image("image.png") {
   |                                       ^^^^^ not found in `lopdf::xobject`

Inspecting the code I see that if it exists [2].

Someone could give me a help with this

Jose

[1] https://www.dariocancelliere.it/blog/2020/09/29/pdf-manipulation-with-rust-and-considerations [2] https://github.com/J-F-Liu/lopdf/blob/fec4729625312da00243b58db2169168fe4ed65d/src/xobject.rs#L29

opened by josejachuf 8

Cannot read pdf document

When I try to read this document: http://mirror.hmc.edu/ctan/macros/latex/contrib/iwhdp/Back_2015.pdf I get:

Not a valid PDF file (prev xref_and_trailer). Mismatch { message: "expect repeat at least 1 times, found 0 times", position: 3117 }

opened by manfredlotz 8
lopdf 0.28.0 breaks something.

So I was updating and trying out 0.28.0 of lopdf the newest update and noticed when i merge a ton of pdf's together that I use to merge with 0.27 that it now is not a workable PDF. IDK exactly what caused the issue but i can try and look at the changes to see what it could be. But this is a major problem if it can no longer merge adobe PDF files together.

I also saw a noticeable size difference between the PDFs as well will diff them to see what might of changed. old 27,448kb new 27,433kb

opened by genusistimelord 5
Object::as_datetime() has issues
While working on tests for rsvg-convert (a tool that's built with librsvg), I found several issues with the Object::as_datetime() implementation. Maybe I'm just using this incorrectly ot it's some other misunderstanding, But I would like to raise the issues here.

Timezone handling

The implementation uses chrono::Local.datetime_from_str(). This method will return an error (ParseError::Impossible)) if the timezone offset in the string doesn't match the local timezone offset. So while this may work for PDF files that are created in the local timezone, it is likely going to fail quite often.

The chrono crate offers another method which is DateTime::parse_from_str(): https://docs.rs/chrono/0.3.0/chrono/datetime/struct.DateTime.html#method.parse_from_str This method seems more appropriate as it can handle different timezone offsets.

In my opinion it would also make sense to consider changing the return value of Object::as_datetime() to return a DateTime in the UTC offset instead of using Local.

Parsing of incomplete dates

I've run into problems because the PDF I tested did not specify the minutes of the timezone offset. So the CreationDate string looked like this:

D:20200211085039+00'

Instead of the proper

D:20200211085039+00'00'

While this was due to a bug in the library that created the PDF, it still seems valid according to the PDF spec. According to the spec all fields after the year are optional. However the code in Object::as_datetime() will raise ParseError(TooShort) unless the complete datetime string is given. So I think the parser should be changed to deal gracefully with the optional fields missing.
opened by svenfoo 5
Corrupt PDF when using the default example [0.6.0]

Hi, I made a completely new project and copied the example code.

I had to switch to rust nightly because #![feature(field_init_shorthand)] is not allowed on stable. I compiled and ran the get started guide. What I got was a corrupted PDF (I'll attach it if I can).

I do not know why the PDF is corrupted, sadly. It would be nice if someone could look into it.

Uploading example.pdf.txt…

opened by fschutt 5
Various improvements, updated libraries and image features

In this PR I updated all libraries, applied various code improvements, fixed tests in case of non enabled chrono feature and done some enrichments on the embed_image feature, now supports buffers.

I also added an useful function for the Document struct called get_object_page(), this function will return the Page ObjectId if found, useful for inserting certain element of another dependent ObjectId and another useful remove_object() function that is able to correctly remove an object from the page and the reference.

I also formatted the code with cargo fmt in order to be compliant to all Rust 2018 rules.

@J-F-Liu please wait before merging this, I'll to check if the embed_image is working properly because I think that is a little buggy. Maybe is not well tested as is not a default feature :-)

opened by Emulator000 4
Make sure Stream.start_position is relative to the whole file.
I noticed this while writing the nom parser.

This is to make sure this code works in reader.rs:

if let Some(start) = stream.start_position { let end = start + length as usize; stream.set_content(self.buffer[start..end].to_vec()); }
opened by jothan 4
Another panic

Another panic for http://ctan.math.washington.edu/tex-archive/macros/latex/contrib/bg/description.pdf

thread 'main' panicked at 'called Result::unwrap() on an Err value: FromUtf8Error { bytes: [139], error: Utf8Error { valid_up_to: 0, error_len: Some(1) } }', libcore/result.rs:945:5

opened by manfredlotz 4
Make chrono crate optional (use `time` directly)
The chrono crate is currently only used for providing a few impls for parsing PDF date objects. However, chrono is a pretty heavy dependency if it is only used for printing the date. The time crate (which chrono itself depends on anyways) is more than enough to do that job (and if it's absolutely necessary to parse date formats, this can also be done with external libraries, there is no need for lopdf to depend on chrono). chrono, however, brings in num-traits, num-*, etc. - very generics-heavy crates that aren't strictly needed for lopdf to work.

Compile time of lopdf:

chrono on: 19.87s

chrono off: 15.57s

By default this feature is on, to not break any code, being backwards-compatible. However, since in theory someone could have default-features = false in his Cargo.toml, this is still a "breaking change" in terms of semver.

The only drawback is that with time parsing, the weekday and year day isn't preserved / calculated (strptime doesn't auto-calculate those fields). But for crates that don't need this behaviour, this can be a significant improvement in terms of compile time.

This PR brings the dependency count from 30 to 19 dependencies (11 dependencies removed). If this is merged, I'd be happy if a new version would be released.
opened by fschutt 4
Text is not replaced in PDFs generated by Word

I have recreated the the existing example.pdf file with Word, i.e. I created a simple Word file with the text "Hello World!" then exported it as a PDF file. When trying to replace the text in this PDF (as per the provided example), nothing happens. It looks like the "Hello World!" text is not even there.

What would be a proper way to replace the text in a Word-generated PDF?

opened by vbocan 0

Invalid cross-reference table (could not parse xref)

fn shrink_file(path: &str) {
  let doc = Document::load(path);
  match doc {
    Ok(mut pdf) => {
      pdf.compress();
      pdf.save(format!("{path}__compressed")).unwrap();
    },
    Err(error) => {
      panic!("error {}", error.to_string())
    }
  };
  
}

Output

thread 'main' panicked at 'error Invalid cross-reference table (could not parse xref)', src/main.rs:36:7
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
 ELIFECYCLE  Command failed.
 ELIFECYCLE  Command failed with exit code 101.

opened by pheianox 1

Can't parse PDF with comments in content stream
# parser.rs:304 pub fn content(input: &[u8]) -> Option<Content<Vec<Operation>>> { (content_space() * operation().repeat(0..).map(|operations| Content { operations })) .parse(input) .ok() }

Returns only Some(Content { operations: [Operation { operator: "q", operands: [] }] }) for the attached sample PDF. It seems like it's not able to parse all the content correctly?

Payslip.pdf
opened by baarkerlounger 2
Add a documentation to PDFutils to replace_text and update readme.md

We should fix the documentiation lopdf/pdfutil/README.md to and and some exemples
USAGE: pdfutil [OPTIONS] [SUBCOMMAND]

FLAGS: -h, --help Prints help information -V, --version Prints version information

OPTIONS: -i, --input
-o, --output

SUBCOMMANDS: compress Compress PDF document decompress Decompress PDF document delete_objects Delete objects delete_pages Delete pages delete_zero_length_streams Delete zero length stream objects extract_pages Extract pages extract_stream Extract stream content extract_text Extract text help Prints this message or the help of the given subcommand(s) print_streams Print streams process Process PDF document with specified operations prune_objects Prune unused objects renumber_objects Renumber objects replace_text Replace text

opened by yodatak 0

Releases(v0.20.0)

v0.20.0(Mar 7, 2019)
Replace println with log macros

Use Rust 2018

Use pom 3.0

Source code(tar.gz)
Source code(zip)
v0.19.0(Oct 24, 2018)
Allow xref section has zero entries, fixes #39

Dictionary key type changed to Vec<u8>, fixes #42

Move image dependency to embed_image feature

Source code(tar.gz)
Source code(zip)
v0.18.0(Oct 5, 2018)
Insert image on page.

Able to read stream when it's length is in object stream.

Source code(tar.gz)
Source code(zip)
v0.17.0(Sep 19, 2018)
Use time crate directly, chrono is optional.

insert_form_object accepts page_id argument.

Source code(tar.gz)
Source code(zip)
v0.16.0(Sep 18, 2018)

Add form xobject to page.
Source code(tar.gz)
Source code(zip)
v0.15.3(Sep 14, 2018)

Source code(tar.gz)
Source code(zip)
v0.15.0(Feb 4, 2018)
Extract text from specified pages.

Replace text of specified page.

Source code(tar.gz)
Source code(zip)
v0.14.1(Nov 4, 2017)
Derive Clone for Document and Xref

Add a dictionary! macro that creates a Dictionary

Add an Object::string_literal constructor

Add impl From<_> for Object for more types

Remove the Seek bound on Document::save_to

Source code(tar.gz)
Source code(zip)
v0.13.0(Oct 2, 2017)
Ignore invalid objects when reading all object in xref table

Store compressed stream objects and normal objects together

Minor bug fixes and improvements

Source code(tar.gz)
Source code(zip)
v0.11.0(Aug 21, 2017)

Use itoa and dtoa to improve writing performance (by sharazam).
Source code(tar.gz)
Source code(zip)
v0.10.0(Aug 4, 2017)

Add stream.with_compression(false) to disable compression, useful in font objects.
Source code(tar.gz)
Source code(zip)
v0.9.0(May 24, 2017)
Add Document::with_version() to create new document.

Add Document::load_from() to read from any source impls Read.

Add Document::save_to() to write any target impls Write.

Fix some bugs.

Source code(tar.gz)
Source code(zip)
v0.8.0(Mar 16, 2017)
Delete specified objects and pages.

Prune unused objects.

Renumber objects.

Source code(tar.gz)
Source code(zip)
v0.7.0(Mar 7, 2017)
Read and write DateTime value.

Read xref stream in hybrid-reference file.

Add Content::decode() function.

Source code(tar.gz)
Source code(zip)
v0.6.0(Feb 16, 2017)

Read previous Xrefs of linearized or incremental updated document.
Source code(tar.gz)
Source code(zip)
v0.5.0(Feb 10, 2017)

Read xref stream and object streams.
Source code(tar.gz)
Source code(zip)
v0.4.0(Jan 29, 2017)

Decode and encode operations of content streams.
Source code(tar.gz)
Source code(zip)
v0.3.0(Jan 18, 2017)
Use pom instead of nom for parsing.

Able to parse stream object when its Length is a reference object.

Dictionary preserves key insert order.

Source code(tar.gz)
Source code(zip)
v0.2.0(Jan 5, 2017)

Compress/Decompress stream objects.
Source code(tar.gz)
Source code(zip)
v0.1.0(Dec 23, 2016)

Read and write PDF document at low object level.
Source code(tar.gz)
Source code(zip)

Owner

Junfeng Liu

https://toolkit.site/

GitHub

An easy-to-use library for writing PDF in Rust

printpdf printpdf is a library designed for creating printable PDF documents. Crates.io | Documentation [dependencies] printpdf = "0.3.2" Features Cur

592 Jan 8, 2023

Generating PDF files in pure Rust

pdf-canvas A pure rust library for generating PDF files. Currently, simple vector graphics and text set in the 14 built-in fonts are supported. To use

128 Jan 1, 2023

PDF command-line utils written in Rust

rpdf PDF command-line utils written in Rust. rpdf makes working with PDF annotions super easy! It can merge annotations from multiple files, some show

13 May 9, 2023

Generate PDF files with JavaScript and WASM (WebAssembly)

WASM-PDF Generates PDF files directly in the browser with JavaScript and WASM (WebAssembly). Idea here is to push all the work involved in creating a

369 Jan 2, 2023

Converts books written in Markdown to HTML, LaTeX/PDF and EPUB

Crowbook Crowbook's aim is to allow you to write a book in Markdown without worrying about formatting or typography, and let the program generate HTML

567 Dec 29, 2022

PDF Structure Viewer, This tool is useful for when working with PDFs and/or lopdf.

PDF Structure Viewer Inspect how the PDF's structure looks. This tool is useful for when working with PDFs and/or lopdf. This application is used lopd

13 Nov 21, 2022

Image cropper (and colorizer) for pdf scans

Image cropper for personal use (might not work with your pdfs) Requires pdfimages on the path to work properly It's thought just for my workflow so is

2 Nov 7, 2022

Make a PDF file by writing kind of like HTML and CSS.

markup-pdf-rs The Rust library for making a PDF files by writing kind of like HTML and CSS. Inspired by Satori and React-pdf. This library makes a PDF

9 Jan 10, 2023

Easy c̵̰͠r̵̛̠ö̴̪s̶̩̒s̵̭̀-t̶̲͝h̶̯̚r̵̺͐e̷̖̽ḁ̴̍d̶̖̔ ȓ̵͙ė̶͎ḟ̴͙e̸̖͛r̶̖͗ë̶̱́ṉ̵̒ĉ̷̥e̷͚̍ s̷̹͌h̷̲̉a̵̭͋r̷̫̊ḭ̵̊n̷̬͂g̵̦̃ f̶̻̊ơ̵̜ṟ̸̈́ R̵̞̋ù̵̺s̷̖̅ţ̸͗!̸̼͋

Rust S̵̓i̸̓n̵̉ I̴n̴f̶e̸r̵n̷a̴l mutability! Howdy, friendly Rust developer! Ever had a value get m̵̯̅ð̶͊v̴̮̾ê̴̼͘d away right under your nose just when

294 Dec 23, 2022

docx-you-want is a tool to convert a PDF document into a .docx file

ddocx-you-want is a tool to convert a PDF document into a .docx file ... in an unusual way. Since these two formats are inherently differ

45 Dec 23, 2022

A document-code sync tools for document engineering.

Writing A document-code sync tools for document engineering. Writing 是一个自动 “文档-代码” 同步工具。解析 Markdown 中的代码定义，读取目标代码，并嵌入到新的文档中。 Language parse support by

18 Oct 11, 2022

A Rust OpenType manipulation library

fonttools-rs This is an attempt to write an Rust library to read, manipulate and write TTF/OTF files. It is in the early stages of development. Cont

36 Nov 14, 2022

PNG manipulation library.

pngmanip A simple rust library for parsing and manipulating PNG images, primarily at the chunk level. The intended use case was for solving PNG based

1 Jan 7, 2022

A library that allows for the arbitrary inspection and manipulation of the memory and code of a process on a Linux system.

raminspect raminspect is a crate that allows for the inspection and manipulation of the memory and code of a running process on a Linux system. It pro

24 Sep 26, 2023

A Rust library for PDF document manipulation.

Related tags

Overview

lopdf

Example Code

FAQ

Comments

Timezone handling

Parsing of incomplete dates

Releases(v0.20.0)

v0.20.0(Mar 7, 2019)

v0.19.0(Oct 24, 2018)

v0.18.0(Oct 5, 2018)

v0.17.0(Sep 19, 2018)

v0.16.0(Sep 18, 2018)

v0.15.3(Sep 14, 2018)

v0.15.0(Feb 4, 2018)

v0.14.1(Nov 4, 2017)

v0.13.0(Oct 2, 2017)

v0.11.0(Aug 21, 2017)

v0.10.0(Aug 4, 2017)

v0.9.0(May 24, 2017)

v0.8.0(Mar 16, 2017)

v0.7.0(Mar 7, 2017)

v0.6.0(Feb 16, 2017)

v0.5.0(Feb 10, 2017)

v0.4.0(Jan 29, 2017)

v0.3.0(Jan 18, 2017)

v0.2.0(Jan 5, 2017)

v0.1.0(Dec 23, 2016)

Owner

Junfeng Liu

An easy-to-use library for writing PDF in Rust

Generating PDF files in pure Rust

PDF command-line utils written in Rust

Generate PDF files with JavaScript and WASM (WebAssembly)

Converts books written in Markdown to HTML, LaTeX/PDF and EPUB

PDF Structure Viewer, This tool is useful for when working with PDFs and/or lopdf.

Image cropper (and colorizer) for pdf scans

Make a PDF file by writing kind of like HTML and CSS.

Easy c̵̰͠r̵̛̠ö̴̪s̶̩̒s̵̭̀-t̶̲͝h̶̯̚r̵̺͐e̷̖̽ḁ̴̍d̶̖̔ ȓ̵͙ė̶͎ḟ̴͙e̸̖͛r̶̖͗ë̶̱́ṉ̵̒ĉ̷̥e̷͚̍ s̷̹͌h̷̲̉a̵̭͋r̷̫̊ḭ̵̊n̷̬͂g̵̦̃ f̶̻̊ơ̵̜ṟ̸̈́ R̵̞̋ù̵̺s̷̖̅ţ̸͗!̸̼͋

docx-you-want is a tool to convert a PDF document into a .docx file

A document-code sync tools for document engineering.

A Rust OpenType manipulation library

PNG manipulation library.

A library that allows for the arbitrary inspection and manipulation of the memory and code of a process on a Linux system.

An easy-to-use library for writing PDF in Rust

Fast DNA manipulation for Python, written in Rust.

Provide types for angle manipulation in rust.

Sudoku Solver using bitmasks and bit-manipulation with Rust 🦀 and egui 🎨

A fast, powerful, flexible and easy to use open source data analysis and manipulation tool written in Rust

A fast, powerful, flexible and easy to use open source data analysis and manipulation tool written in Rust