ELF loading

the code is completely busted, but at least it seems to work...
This commit is contained in:
2024-03-26 20:35:27 +01:00
parent 7e3dcd7a8b
commit 0c6e4f0c3a
11 changed files with 534 additions and 41 deletions

View File

@@ -4,6 +4,7 @@
#include <cstddef>
#include "BytesFormatter.hpp"
#include "ElfParser.hpp"
#include "LockGuard.hpp"
#include "MemFs.hpp"
#include "MountTable.hpp"
@@ -163,11 +164,15 @@ void ktask_main() {
GlobalTtyManager.all_tty_putstr(saved_modules_names[i]);
GlobalTtyManager.all_tty_putchar('\n');
Task *utask = new Task(Task::TaskMode::TASKMODE_USER, (void (*)()) 0x00020000, saved_modules_names[i]);
assert(saved_modules_size > 0);
utask->_vma->mmap_phys((void *) 0x00020000, (void *) KERN_V2P(saved_modules_data[i]),
max_saved_module_file_size, PAGE_USER | PAGE_RW);
utask->start();
cgistd::vector<char> read_data(max_saved_module_file_size);
memcpy(read_data.begin(), saved_modules_data[i], max_saved_module_file_size);
ElfParser elfParser(read_data);
Task *utask = new Task(Task::TaskMode::TASKMODE_USER, (void (*)()) elfParser.get_entrypoint(), saved_modules_names[i]);
if (elfParser.copy_to(utask))
utask->start();
else
assert2(false, "Init couldn't be loaded!");
}
VFSApi::close(fd);

View File

@@ -15,10 +15,12 @@
#include "misc.hpp"
#include "BytesFormatter.hpp"
#include "ElfParser.hpp"
#include "FDT.hpp"
#include "File.hpp"
#include "memman.hpp"
#include "paging.hpp"
#include "stl/vector"
#include "task.hpp"
#include "timer.hpp"
@@ -169,22 +171,20 @@ uint64_t syscall_execve(const char *pathname, char *const argv[], char *const en
// Just copy for now;
FDT::FD fd = VFSApi::open(StrToPath(pathname));
if (fd == -1) return -1;
File *f = VFSApi::get(fd);
Task *utask = new Task(Task::TaskMode::TASKMODE_USER, (void (*)()) 0x00020000, pathname);
char *mapped = static_cast<char *>(utask->_vma->mmap_mem((void *) 0x00020000, f->size(), 0, PAGE_USER | PAGE_RW));
assert(mapped == (void *) 0x00020000);
char *target = mapped + f->size();
for (; mapped < target; mapped += PAGE_SIZE) {
char buf[PAGE_SIZE];
uint64_t read = f->read(buf, PAGE_SIZE);
memcpy((char *) HHDM_P2V(utask->_addressSpace->virt2real(mapped)), buf, read);
}
File *f = VFSApi::get(fd);
cgistd::vector<char> read_data(f->size());
f->read(read_data.begin(), f->size());
VFSApi::close(fd);
utask->start();
ElfParser elfParser(read_data);
Task *utask = new Task(Task::TaskMode::TASKMODE_USER, (void (*)()) elfParser.get_entrypoint(), pathname);
if (elfParser.copy_to(utask))
utask->start();
else
return -1;
return 0;
}

View File

@@ -16,4 +16,5 @@ target_sources(kernel.elf PRIVATE
)
add_subdirectory(templates)
add_subdirectory(vfs)
add_subdirectory(vfs)
add_subdirectory(elf)

200
src/kernel/Serialize.hpp Normal file
View File

@@ -0,0 +1,200 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_SERIALIZE_H
#define SEMBACKUP_SERIALIZE_H
#include <cstddef>
#include <memory>
#include <type_traits>
#include <utility>
#include <stl/vector>
//#ifdef __APPLE__
//#include <machine/endian.h>
//#define htobe64(x) htonll(x)
//#define be64toh(x) ntohll(x)
//#else
//#include <endian.h>
//#endif
/// Serialization library
/**
* To serialize the objects in Repository, we have to handle a couple of cases:
* 1. Serializing integers (object ids, etc...)
* 2. Serializing enums (object types)
* 3. Serializing char vectors and strings
* 4. Serializing other STL containers (which also requires serializing pairs)
* 5. Serializing custom structs (including the objects themselves)
*
* With this library it is possible to do all of that.
* One problem is that it isn't really portable, but it can be fixed by changing the std::is_integral<T>::value case to use something like be64toh/htobe64
*
*/
namespace Serialize {
template<typename, typename = void, typename = void>
struct is_pair : std::false_type {};
template<typename P>
struct is_pair<P, std::void_t<decltype(std::declval<P>().first)>, std::void_t<decltype(std::declval<P>().second)>>
: std::true_type {};
template<typename, typename, typename = void>
struct has_emplace_back : std::false_type {};
template<typename T, typename V>
struct has_emplace_back<T, V, std::void_t<decltype(T().emplace_back(std::declval<V>()))>> : std::true_type {};
template<typename, typename = void, typename = void>
struct serializable : std::false_type {};
/// Checks if the object has the `serializable` type
/// In that case, its serialization will be delegated to its .serialize() parameter,
/// and deserialization to its T(char vector iterator in, const char vector iterator end) constructor,
/// similar to Serialize::deserialize
template<typename T>
struct serializable<T, std::void_t<decltype(T::serializable::value)>> : std::true_type {};
/// Deserializes object of type \p T starting from fist byte \p in, advances the iterator past the end of object
/// \tparam T Type to deserialize
/// \param in Iterator to the first byte of the object
/// \param end End iterator of source container
/// \return Deserialized value
template<typename T>
static std::optional<T> deserialize(cgistd::vector<char>::const_iterator &in, const cgistd::vector<char>::const_iterator &end);
/// Serializes object of type \p T into vector \p out
/// \tparam T Type to serialize
/// \param what Constant reference to the serialized object
/// \param out Reference to output vector
template<typename T>
static void serialize(const T &what, cgistd::vector<char> &out);
/// Serializes the object of type \p T and returns the resulting vector
/// \tparam T Type to serialize
/// \param o Constant reference to the serialized object
/// \return Serialized data
template<typename T>
static cgistd::vector<char> serialize(const T &o);
/// Deserializes object of type \p T from input vector \p from
/// \tparam T Type to deserialize
/// \param from Constant reference to the serialized object
/// \return Deserialized value
template<typename T>
static std::optional<T> deserialize(const cgistd::vector<char> &from);
template<typename T>
std::optional<T> deserialize(cgistd::vector<char>::const_iterator &in, const cgistd::vector<char>::const_iterator &end) {
if (in >= end) return std::nullopt;
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its constructor with in and end
return T(in, end);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, deserialize the first and second element and return the pair
using KT = typename std::remove_const<decltype(T::first)>::type;
using VT = typename std::remove_const<decltype(T::second)>::type;
auto K = deserialize<KT>(in, end);
auto V = deserialize<VT>(in, end);
return T(std::move(K), std::move(V));
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, deserialize an int and cast it to the enum
auto tmp = deserialize<uint32_t>(in, end);
if (tmp >= 0 && tmp < static_cast<uint32_t>(T::END)) return static_cast<T>(tmp);
else
return std::nullopt;
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
if (std::distance(in, end) < sizeof(T)) return std::nullopt;
return *(in++);
} else if constexpr (std::is_integral<T>::value) {
T tmp;
// If the object is a number, copy it byte-by-byte
if (std::distance(in, end) < sizeof(tmp)) return std::nullopt;
std::copy(in, in + sizeof(tmp), reinterpret_cast<char *>(&tmp));
in += sizeof(tmp);
return static_cast<T>(tmp);
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
auto size = deserialize<size_t>(in, end);
if (!size) return std::nullopt;
auto b = deserialize<char>(in, end);
if (!b || *b != 'b') return std::nullopt;
T out;
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
if (std::distance(in, end) < *size) return std::nullopt;
out.insert(out.end(), in, in + *size);
in += *size;
} else
for (size_t i = 0; i < *size; i++) {
using V = typename T::value_type;
V v = deserialize<V>(in, end);
// Try either emplace_back or emplace if it doesn't exist
if constexpr (has_emplace_back<T, V>::value) out.emplace_back(std::move(v));
else
out.emplace(std::move(v));
}
b = deserialize<char>(in, end);
if (!b || *b != 'e') return std::nullopt;
return out;
}
}
template<typename T>
void serialize(const T &what, cgistd::vector<char> &out) {
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its serialize method
what.serialize(out);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, serialize the first and second element
serialize(what.first, out);
serialize(what.second, out);
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, cast it to an int and serialize that
serialize(static_cast<uint32_t>(what), out);
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
out.push_back(what);
} else if constexpr (std::is_integral<T>::value) {
// If the object is a number, copy it byte-by-byte
T tmp = static_cast<T>(what);
out.insert(out.end(), (reinterpret_cast<const char *>(&tmp)),
(reinterpret_cast<const char *>(&tmp) + sizeof(tmp)));
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
serialize(what.size(), out);
serialize('b', out);
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
out.insert(out.end(), what.begin(), what.end());
} else
for (auto const &i: what) { serialize(i, out); }
serialize('e', out);
}
}
template<typename T>
cgistd::vector<char> serialize(const T &o) {
cgistd::vector<char> out;
serialize(o, out);
return out;
}
template<typename T>
std::optional<T> deserialize(const cgistd::vector<char> &from) {
auto bgwr = from.begin();
return deserialize<T>(bgwr, from.end());
}
} // namespace Serialize
#endif //SEMBACKUP_SERIALIZE_H

View File

@@ -162,4 +162,8 @@ struct type_mismatch_info {
writestr_no_yield("Warning: division overflow\n");
if (SAN_STOP) _hcf();
}
[[maybe_unused]] void __ubsan_handle_nonnull_arg() {
writestr_no_yield("Warning: null argument\n");
if (SAN_STOP) _hcf();
}
};

View File

@@ -0,0 +1,6 @@
target_include_directories(kernel.elf PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_sources(kernel.elf PRIVATE
ElfParser.cpp
)

View File

@@ -0,0 +1,171 @@
//
// Created by Stepan Usatiuk on 25.03.2024.
//
#include "ElfParser.hpp"
#include "Serialize.hpp"
#include "VMA.hpp"
#include "paging.hpp"
#include "task.hpp"
ElfParser::ElfParser(const cgistd::vector<char> &data) {
auto it = data.begin();
if (Serialize::deserialize<char>(it, data.end()) != 0x7F) return;
if (Serialize::deserialize<char>(it, data.end()) != 'E') return;
if (Serialize::deserialize<char>(it, data.end()) != 'L') return;
if (Serialize::deserialize<char>(it, data.end()) != 'F') return;
if (Serialize::deserialize<char>(it, data.end()) != 2) return;
if (Serialize::deserialize<char>(it, data.end()) != 1) return;
if (auto val = Serialize::deserialize<uint8_t>(it, data.end()))
_elf_hdr_ver = *val;
else
return;
if (auto val = Serialize::deserialize<uint8_t>(it, data.end()))
_abi = *val;
else
return;
if (std::distance(it, data.end()) < 8) return;
std::advance(it, 8);
if (Serialize::deserialize<uint16_t>(it, data.end()) != 2) return;
if (Serialize::deserialize<uint16_t>(it, data.end()) != 0x3E) return;
if (auto val = Serialize::deserialize<uint32_t>(it, data.end()))
_elf_hdr_ver = *val;
else
return;
if (auto val = Serialize::deserialize<uintptr_t>(it, data.end()))
_entrypoint = *val;
else
return;
if (auto val = Serialize::deserialize<uintptr_t>(it, data.end()))
_phdrs_pos = *val;
else
return;
if (auto val = Serialize::deserialize<uintptr_t>(it, data.end()))
_sects_pos = *val;
else
return;
if (auto val = Serialize::deserialize<uint32_t>(it, data.end()))
_flags = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_hdr_size = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_hdrs_entry_size = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_hdrs_num = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_sects_entry_size = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_sects_num = *val;
else
return;
if (auto val = Serialize::deserialize<uint16_t>(it, data.end()))
_sects_name_idx = *val;
else
return;
auto hdr_begin = data.begin();
std::advance(hdr_begin, _phdrs_pos);
for (int i = 0; i < _hdrs_num; i++) {
auto hdr = Serialize::deserialize<Elf64_Phdr>(hdr_begin, data.end());
if (!hdr->valid) return;
_headers.push_back(*hdr);
}
_data = data;
_valid = true;
}
bool ElfParser::copy_to(Task *task) {
if (!_valid) return false;
for (const auto &hdr: _headers) {
if (hdr.p_type == 1 /*PT_LOAD*/) {
// For some reason, if a segment is empty it disregards it's supposed position in the linker script
// and everything breaks
if (hdr.p_memsz == 0) continue;
uint32_t flags = 0;
if (hdr.p_flags & 0x1 /*eXecute*/) {
// TODO:
}
if (hdr.p_flags & 0x2 /*Write*/) {
flags |= PAGE_RW;
}
auto rounded_vaddr = hdr.p_vaddr & 0x000FFFFFFFFFF000ULL;
auto real_memsz = hdr.p_memsz + (hdr.p_vaddr - rounded_vaddr);
uintptr_t real_ptr = reinterpret_cast<uintptr_t>(task->_vma->mmap_mem(reinterpret_cast<void *>(rounded_vaddr), real_memsz, 0, flags | PAGE_USER));
if (real_ptr != rounded_vaddr) return false;
auto *file_ptr = _data.begin();
std::advance(file_ptr, hdr.p_offset);
char *task_mem = reinterpret_cast<char *>(real_ptr + (hdr.p_vaddr - rounded_vaddr));
for (size_t i = 0; i < hdr.p_memsz; i++, task_mem++) {
char *real_ptr = (char *) HHDM_P2V(task->_addressSpace->virt2real(
reinterpret_cast<void *>((uintptr_t) task_mem & 0x000FFFFFFFFFF000ULL)) +
((uintptr_t) task_mem & 0xFFF));
if (i >= hdr.p_filesz) {
*real_ptr = 0;
} else {
*real_ptr = *(file_ptr + i);
}
}
}
}
return true;
}
ElfParser::Elf64_Phdr::Elf64_Phdr(cgistd::vector<char>::const_iterator &in, cgistd::vector<char>::const_iterator const &end) {
if (auto val = Serialize::deserialize<Elf64_Word>(in, end))
p_type = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Word>(in, end))
p_flags = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Off>(in, end))
p_offset = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Addr>(in, end))
p_vaddr = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Addr>(in, end))
p_paddr = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Xword>(in, end))
p_filesz = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Xword>(in, end))
p_memsz = *val;
else
return;
if (auto val = Serialize::deserialize<Elf64_Xword>(in, end))
p_align = *val;
else
return;
valid = true;
}

View File

@@ -0,0 +1,64 @@
//
// Created by Stepan Usatiuk on 25.03.2024.
//
#ifndef OS2_ELFPARSER_HPP
#define OS2_ELFPARSER_HPP
#include "stl/vector"
class Task;
// Just copying everytihng for now
class ElfParser {
public:
ElfParser(const cgistd::vector<char> &data);
bool copy_to(Task *task);
uintptr_t get_entrypoint() { return _entrypoint; }
private:
bool _valid = false;
uint8_t _elf_hdr_ver;
uint8_t _abi;
uint16_t _elf_ver;
uintptr_t _entrypoint;
uintptr_t _phdrs_pos;
uintptr_t _sects_pos;
uint32_t _flags;
uint16_t _hdr_size;
uint16_t _hdrs_entry_size;
uint16_t _hdrs_num;
uint16_t _sects_entry_size;
uint16_t _sects_num;
uint16_t _sects_name_idx;
using Elf64_Addr = uintptr_t;
using Elf64_Off = size_t;
using Elf64_Half = uint16_t;
using Elf64_Word = uint32_t;
using Elf64_Sword = int32_t;
using Elf64_Xword = uint64_t;
using Elf64_Sxword = int64_t;
struct Elf64_Phdr {
Elf64_Word p_type; /* Type of segment */
Elf64_Word p_flags; /* Segment attributes */
Elf64_Off p_offset; /* Offset in file */
Elf64_Addr p_vaddr; /* Virtual address in memory */
Elf64_Addr p_paddr; /* Reserved */
Elf64_Xword p_filesz; /* Size of segment in file */
Elf64_Xword p_memsz; /* Size of segment in memory */
Elf64_Xword p_align; /* Alignment of segment */
bool valid = false;
using serializable = std::true_type;
Elf64_Phdr(cgistd::vector<char>::const_iterator &in, const cgistd::vector<char>::const_iterator &end);
};
cgistd::vector<Elf64_Phdr> _headers;
cgistd::vector<char> _data;
};
#endif //OS2_ELFPARSER_HPP

View File

@@ -58,6 +58,9 @@ inline void _Destroy(_Tp* __pointer) {
__pointer->~_Tp();
}
template <class _Tp>
inline void destroy(_Tp* __pointer);
template <class _ForwardIterator>
void
__destroy_aux(_ForwardIterator __first, _ForwardIterator __last, __false_type)

View File

@@ -1,7 +1,15 @@
#include "syscalls_interface.h"
volatile char asdfasdf[323];
volatile int x = 3;
volatile int w = 0;
volatile const char *hello = "hello xd";
__attribute__((unused)) void _start() {
if (x == 3) putchar('x');
if (w == 2) putchar('w');
if (asdfasdf[0] == '\0') putchar('a');
putchar('h');
putchar('i');
putchar('\n');

View File

@@ -1,27 +1,58 @@
OUTPUT_FORMAT("binary")
OUTPUT_FORMAT("elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
ENTRY(_start)
phys = 0x00020000;
PHDRS
{
text PT_LOAD FLAGS((1 << 0) | (1 << 2)) ; /* Execute + Read */
rodata PT_LOAD FLAGS((1 << 2)) ; /* Read only */
data PT_LOAD FLAGS((1 << 1) | (1 << 2)) ; /* Write + Read */
dynamic PT_DYNAMIC FLAGS((1 << 1) | (1 << 2)) ; /* Dynamic PHDR for relocations */
}
SECTIONS
{
.text phys : AT(phys) {
code = .;
*(.text)
*(.rodata)
. = ALIGN(4096);
}
.data : AT(phys + (data - code))
{
data = .;
*(.data)
. = ALIGN(4096);
}
.bss : AT(phys + (bss - code))
{
bss = .;
*(.bss)
. = ALIGN(4096);
}
end = .;
}
. = 0x00020000;
.text : {
*(.text .text.*)
} :text
/* Move to the next memory page for .rodata */
. += CONSTANT(MAXPAGESIZE);
.rodata : {
ctors_begin = .;
*(.ctors)
*(.init_array)
ctors_end = .;
*(.rodata .rodata.*)
} :rodata
/* Move to the next memory page for .data */
. += CONSTANT(MAXPAGESIZE);
.data : {
*(.data .data.*)
} :data
/* Dynamic section for relocations, both in its own PHDR and inside data PHDR */
.dynamic : {
*(.dynamic)
} :data :dynamic
/* NOTE: .bss needs to be the last thing mapped to :data, otherwise lots of */
/* unnecessary zeros will be written to the binary. */
/* If you need, for example, .init_array and .fini_array, those should be placed */
/* above this. */
.bss : {
*(.bss .bss.*)
*(COMMON)
} :data
/* Discard .note.* and .eh_frame since they may cause issues on some hosts. */
/DISCARD/ : {
*(.eh_frame)
*(.note .note.*)
}}