From 89f562fa8658a183417b694cef7a8b5a05ef1e89 Mon Sep 17 00:00:00 2001 From: Jan-Erik Rediger Date: Fri, 8 Dec 2023 16:41:25 +0100 Subject: [PATCH] init --- .gitignore | 4 + crates/goblin-pyo3/Cargo.lock | 324 ++++++++++++++++++++++++ crates/goblin-pyo3/Cargo.toml | 12 + crates/goblin-pyo3/pyproject.toml | 13 + crates/goblin-pyo3/src/lib.rs | 400 ++++++++++++++++++++++++++++++ crates/goblin-pyo3/test.py | 22 ++ 6 files changed, 775 insertions(+) create mode 100644 .gitignore create mode 100644 crates/goblin-pyo3/Cargo.lock create mode 100644 crates/goblin-pyo3/Cargo.toml create mode 100644 crates/goblin-pyo3/pyproject.toml create mode 100644 crates/goblin-pyo3/src/lib.rs create mode 100644 crates/goblin-pyo3/test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..65332ff --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv +*.so +*.dylib +**/target/ diff --git a/crates/goblin-pyo3/Cargo.lock b/crates/goblin-pyo3/Cargo.lock new file mode 100644 index 0000000..2363b77 --- /dev/null +++ b/crates/goblin-pyo3/Cargo.lock @@ -0,0 +1,324 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "goblin" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f27c1b4369c2cd341b5de549380158b105a04c331be5db9110eef7b6d2742134" +dependencies = [ + "log", + "plain", + "scroll", +] + +[[package]] +name = "goblin-pyo3" +version = "0.1.0" +dependencies = [ + "goblin", + "pyo3", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indoc" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" + +[[package]] +name = "libc" +version = "0.2.150" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + +[[package]] +name = "proc-macro2" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04e8453b658fe480c3e70c8ed4e3d3ec33eb74988bd186561b0cc66b85c3bc4b" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96fe70b176a89cff78f2fa7b3c930081e163d5379b4dcdf993e3ae29ca662e5" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "214929900fd25e6604661ed9cf349727c8920d47deff196c4e28165a6ef2a96b" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac53072f717aa1bfa4db832b39de8c875b7c7af4f4a6fe93cdbf9264cf8383b" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7774b5a8282bd4f25f803b1f0d945120be959a36c72e08e7cd031c792fdfd424" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "scroll" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04c565b551bafbef4157586fa379538366e4385d42082f255bfd96e4fe8519da" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1db149f81d46d2deba7cd3c50772474707729550221e69588478ebf9ada425ae" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "smallvec" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" + +[[package]] +name = "syn" +version = "2.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c39fd04924ca3a864207c66fc2cd7d22d7c016007f9ce846cbb9326331930a" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/crates/goblin-pyo3/Cargo.toml b/crates/goblin-pyo3/Cargo.toml new file mode 100644 index 0000000..e71f158 --- /dev/null +++ b/crates/goblin-pyo3/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "goblin-pyo3" +version = "0.1.0" +edition = "2021" + +[lib] +name = "goblin" +crate-type = ["cdylib"] + +[dependencies] +goblin = "0.7.1" +pyo3 = { version = "0.20.0", features = ["extension-module"] } diff --git a/crates/goblin-pyo3/pyproject.toml b/crates/goblin-pyo3/pyproject.toml new file mode 100644 index 0000000..2dc499f --- /dev/null +++ b/crates/goblin-pyo3/pyproject.toml @@ -0,0 +1,13 @@ +[build-system] +requires = ["maturin>=1,<2"] +build-backend = "maturin" + +[project] +name = "goblin" +requires-python = ">=3.7" +classifiers = [ + "Programming Language :: Rust", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", +] + diff --git a/crates/goblin-pyo3/src/lib.rs b/crates/goblin-pyo3/src/lib.rs new file mode 100644 index 0000000..4ad0289 --- /dev/null +++ b/crates/goblin-pyo3/src/lib.rs @@ -0,0 +1,400 @@ +use std::{ + fs::File, + io::Read, +}; + +use goblin::{ + mach::{symbols::Nlist, Mach}, +}; +use pyo3::{prelude::*, exceptions::PyTypeError}; + +#[pyclass] +struct Object { + len: usize, + ptr: *mut u8, + inner: Option>, +} + +// SAFETY: We only use `ptr` in `drop` to reconstruct a `Vec` +unsafe impl Send for Object {} + +#[pymethods] +impl Object { + #[new] + fn new(path: String) -> Self { + let mut file = File::open(path).unwrap(); + let size = file.metadata().map(|m| m.len() as usize).ok(); + let mut vec = Vec::with_capacity(size.unwrap_or(0)); + file.read_to_end(&mut vec).unwrap(); + + vec.shrink_to_fit(); + let len = vec.len(); + let cap = vec.capacity(); + let ptr = vec.as_mut_ptr(); + assert!(len == cap); + + let obj = vec.leak(); + let object = goblin::Object::parse(obj).unwrap(); + + Self { + len, + ptr, + inner: Some(object), + } + } + + #[getter] + fn header(&self) -> Header { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => Header::from(macho.header), + _ => unimplemented!(), + } + } + + #[getter] + fn name(&self) -> Option<&str> { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => macho.name.clone(), + _ => unimplemented!(), + } + } + + fn symbols(&self) -> Symbols { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => Symbols::from(macho.symbols()), + _ => unimplemented!(), + } + } + + #[getter] + fn libs(&self) -> Vec<&str> { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => macho.libs.clone(), + _ => unimplemented!(), + } + } + + #[getter] + fn rpaths(&self) -> Vec<&str> { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => macho.rpaths.clone(), + _ => unimplemented!(), + } + } + + fn exports(&self) -> Result, PyErr> { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => { + let exports = macho.exports().map_err(|_| PyErr::new::("failed"))?; + Ok(exports.into_iter().map(|exp| exp.into()).collect()) + }, + _ => unimplemented!(), + } + } + + fn imports(&self) -> Result, PyErr> { + match self.inner.as_ref().unwrap() { + goblin::Object::Mach(Mach::Binary(macho)) => { + let imports = macho.imports().map_err(|_| PyErr::new::("failed"))?; + Ok(imports.into_iter().map(|exp| exp.into()).collect()) + }, + _ => unimplemented!(), + } + } +} + +impl Drop for Object { + fn drop(&mut self) { + let obj = self.inner.take(); + drop(obj); + + // SAFETY: + // We took `ptr` and `len` from the vec earlier, + // then leaked it to get a stic reference to it which was only held within `self.inner`, + // which has been dropped above. + unsafe { + let vec = Vec::from_raw_parts(self.ptr, self.len, self.len); + drop(vec); + } + } +} + +#[derive(Debug, Clone)] +#[pyclass] +struct Header { + #[pyo3(get)] + magic: u32, + #[pyo3(get)] + cputype: u32, + #[pyo3(get)] + cpusubtype: u32, + #[pyo3(get)] + filetype: u32, + #[pyo3(get)] + ncmds: usize, + #[pyo3(get)] + sizeofcmds: u32, + #[pyo3(get)] + flags: u32, + #[pyo3(get)] + reserved: u32, +} + +impl From for Header { + fn from(other: goblin::mach::header::Header) -> Self { + Header { + magic: other.magic, + cputype: other.cputype, + cpusubtype: other.cpusubtype, + filetype: other.filetype, + ncmds: other.ncmds, + sizeofcmds: other.sizeofcmds, + flags: other.flags, + reserved: other.reserved, + } + } +} + +#[pymethods] +impl Header { + fn __repr__(&self) -> String { + format!("{:?}", self) + } +} + +#[derive(Debug, Clone)] +#[pyclass] +struct Symbol { + #[pyo3(get)] + name: String, + + meta: Nlist, +} + +#[pymethods] +impl Symbol { + #[getter] + fn typ(&self) -> &'static str { + self.meta.type_str() + } + + #[getter] + fn global(&self) -> bool { + self.meta.is_global() + } + + #[getter] + fn weak(&self) -> bool { + self.meta.is_weak() + } + + #[getter] + fn undefined(&self) -> bool { + self.meta.is_undefined() + } + + #[getter] + fn stab(&self) -> bool { + self.meta.is_stab() + } + + fn __repr__(&self) -> String { + format!( + "Symbol {{ name: {}, global: {}, weak: {}, undefined: {}, stab: {} }}", + self.name, + self.global(), + self.weak(), + self.undefined(), + self.stab() + ) + } +} + +#[pyclass] +struct Symbols { + symbols: Vec, +} + +#[pymethods] +impl Symbols { + fn __iter__(slf: PyRef<'_, Self>) -> PyResult> { + let iter = SymbolIter { + inner: slf.symbols.clone().into_iter(), + }; + Py::new(slf.py(), iter) + } +} + +impl From> for Symbols { + fn from(other: goblin::mach::symbols::SymbolIterator) -> Self { + let symbols = other + .map(|sym| { + let (symname, meta) = sym.unwrap(); + Symbol { + name: symname.to_string(), + meta, + } + }) + .collect(); + + Symbols { symbols } + } +} + +#[pyclass] +struct SymbolIter { + inner: std::vec::IntoIter, +} + +#[pymethods] +impl SymbolIter { + fn __iter__(slf: PyRef<'_, Self>) -> PyRef<'_, Self> { + slf + } + + fn __next__(mut slf: PyRefMut<'_, Self>) -> Option { + slf.inner.next() + } +} + +#[derive(Debug, Default, Clone)] +#[pyclass] +enum ExportTyp { + #[default] + Regular, + Reexport, + Stub +} + +#[derive(Debug, Default, Clone)] +#[pyclass] +pub struct ExportInfo { + #[pyo3(get)] + typ: ExportTyp, + #[pyo3(get)] + address: u64, + #[pyo3(get)] + flags: u64, + #[pyo3(get)] + lib: String, + #[pyo3(get)] + lib_symbol_name: Option, +} + +impl From> for ExportInfo { + fn from(info: goblin::mach::exports::ExportInfo) -> Self { + use goblin::mach::exports::ExportInfo::*; + match info { + Regular { address, flags } => { + Self { + typ: ExportTyp::Regular, + address, + flags, + .. Default::default() + } + } + Reexport { lib, lib_symbol_name, flags } => { + Self { + typ: ExportTyp::Reexport, + lib: lib.to_string(), + lib_symbol_name: lib_symbol_name.map(|s| s.to_string()), + flags, + .. Default::default() + } + } + Stub { + flags, .. + } => { + Self { + typ: ExportTyp::Stub, + flags, + .. Default::default() + } + } + } + } +} + +#[derive(Debug)] +#[pyclass] +struct Export { + #[pyo3(get)] + name: String, + #[pyo3(get)] + info: ExportInfo, + #[pyo3(get)] + size: usize, + #[pyo3(get)] + offset: u64, +} + +#[pymethods] +impl Export { + fn __repr__(&self) -> String { + format!("{:?}", self) + } +} + +impl From> for Export { + fn from(export: goblin::mach::exports::Export) -> Self { + Self { + name: export.name, + info: export.info.into(), + size: export.size, + offset: export.offset, + } + } +} + +#[derive(Debug)] +#[pyclass] +struct Import { + #[pyo3(get)] + name: String, + #[pyo3(get)] + dylib: String, + #[pyo3(get)] + is_lazy: bool, + #[pyo3(get)] + offset: u64, + #[pyo3(get)] + size: usize, + #[pyo3(get)] + address: u64, + #[pyo3(get)] + addend: i64, + #[pyo3(get)] + is_weak: bool, + #[pyo3(get)] + start_of_sequence_offset: u64, +} + +#[pymethods] +impl Import { + fn __repr__(&self) -> String { + format!("{:?}", self) + } +} + +impl From> for Import { + fn from(import: goblin::mach::imports::Import<'_>) -> Self { + Self { + name: import.name.to_string(), + dylib: import.dylib.to_string(), + is_lazy: import.is_lazy, + offset: import.offset, + size: import.size, + address: import.address, + addend: import.addend, + is_weak: import.is_weak, + start_of_sequence_offset: import.start_of_sequence_offset, + } + } +} + +#[pymodule] +#[pyo3(name = "goblin")] +fn py_goblin(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + m.add_class::()?; + Ok(()) +} diff --git a/crates/goblin-pyo3/test.py b/crates/goblin-pyo3/test.py new file mode 100644 index 0000000..d6a8724 --- /dev/null +++ b/crates/goblin-pyo3/test.py @@ -0,0 +1,22 @@ +import goblin + +g = goblin.Object("mylib.dylib") +print(g.header) +print(g.name) + +print("symbols") +for sym in g.symbols(): + print(sym) + break + +print("libs") +print(g.libs) + +print("rpaths") +print(g.rpaths) + +print("exports") +print(len(g.exports())) + +print("imports") +print(len(g.imports()))