Re: APT 2.0 search patterns
On Mon, May 06, 2019 at 07:00:02PM +0200, Julian Andres Klode wrote:
> Hi,
>
> so this below is roughly a first draft at understanding
> the syntax of search patterns of aptitude and integrating
> it into apt. It's very incomplete and needs more work.
I have narrowed the syntax down in order to make things easier
to implement and more uniform, so the parsing first creates
a rough parse tree and then a more usable abstract syntax
tree (which can then be tree-walked).
We only support three forms of syntax now:
pattern = '?' NAME
| '?' NAME '(' pattern (',' pattern)* ','? ')'
| WORD
| QUOTED-WORD
It's kind of a lisp, just with the list head moved to before
the '(', and commas separating arguments.
where
WORD = [0-9a-zA-Z-.*^$\[\]_\\]+
QUOTED-WORD = "[^"]+"
NAME = [0-9a-zA-Z-]
that said, we don't use a separate tokenizer.
this is parsed into a simple parse tree. The simple parse tree
has a few differences from the grammar above, but see for yourself:
// Handles ?NAME and ?NAME(...
struct PatternNode : public Node {
APT::StringView term;
std::vector<std::unique_ptr<Node>> arguments;
std::ostream& render(std::ostream& stream) override;
};
// Handles barewords and quoted words
struct WordNode : public Node {
APT::StringView word;
bool quoted = false;
std::ostream& render(std::ostream& stream) override;
};
The next step then is transforming that parse tree into an
abstract syntax tree. Nothing done on that front yet.
Parser is attached, it parses a given argument and prints out
JSON.
--
debian developer - deb.li/jak | jak-linux.org - free software dev
ubuntu core developer i speak de, en
#include <apt-pkg/string_view.h>
#include <iostream>
#include <memory>
#include <string>
#include <vector>
struct PatternTreeParser
{
struct Node
{
/// \brief Offset of the start of this node
off_t start = 0;
/// \brief Offset of the end of this node
off_t end = 0;
void error(std::string message);
virtual std::ostream &render(std::ostream &os) { return os; };
};
struct Error
{
Node location;
std::string message;
};
struct PatternNode : public Node
{
APT::StringView term;
std::vector<std::unique_ptr<Node>> arguments;
std::ostream &render(std::ostream &stream) override;
};
struct WordNode : public Node
{
APT::StringView word;
bool quoted = false;
std::ostream &render(std::ostream &stream) override;
};
APT::StringView sentence;
struct state
{
off_t offset = 0;
} state;
PatternTreeParser(APT::StringView sentence) : sentence(sentence){};
std::unique_ptr<Node> parse();
};
std::ostream &PatternTreeParser::PatternNode::render(std::ostream &os)
{
os << "{"
<< "\"term\": \"" << term.to_string() << "\",\n"
<< "\"arguments\": [\n";
for (auto &node : arguments)
node->render(os) << "," << std::endl;
os << "null]\n";
os << "}\n";
return os;
}
std::ostream &PatternTreeParser::WordNode::render(std::ostream &os)
{
os << '"' << word.to_string() << '"';
return os;
}
void PatternTreeParser::Node::error(std::string message)
{
throw Error{*this, message};
}
std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parse()
{
auto skipSpace = [this]() -> off_t {
while (sentence[state.offset] == ' ' || sentence[state.offset] == '\t' || sentence[state.offset] == '\r' || sentence[state.offset] == '\n')
state.offset++;
return state.offset;
};
if (sentence[state.offset] == '\0')
{
abort();
}
else if (sentence[state.offset] == '?')
{
state.offset++;
auto node = std::make_unique<PatternNode>();
node->end = node->start = state.offset;
while (APT::StringView("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-").find(sentence[state.offset]) != APT::StringView::npos)
{
node->end = ++state.offset;
}
node->term = sentence.substr(node->start, node->end - node->start);
skipSpace();
// We don't have any arguments, return node;
if (sentence[state.offset] != '(')
return node;
node->end = ++state.offset;
skipSpace();
// Empty argument list, return
if (sentence[state.offset] == ')')
return node;
node->arguments.push_back(parse());
skipSpace();
while (sentence[state.offset] == ',')
{
node->end = ++state.offset;
skipSpace();
node->arguments.push_back(parse());
skipSpace();
}
// Empty argument list, return
if (sentence[state.offset] == ',')
{
node->end = state.offset++;
node->end = skipSpace();
}
if (sentence[state.offset] != ')')
throw Error{*node, "Could not find end of argument list"};
node->end = ++state.offset;
node->end = node->arguments[node->arguments.size() - 1]->end;
return node;
}
else if (sentence[state.offset] == '"')
{
auto node = std::make_unique<WordNode>();
node->end = node->start = state.offset;
state.offset++;
while (sentence[state.offset] != '"' && sentence[state.offset] != '\0')
{
std::cerr << "IN THE QWORD " << sentence[state.offset] << "\n";
node->end++, state.offset++;
}
if (sentence[state.offset] != '"')
throw Error{*node, "Could not find end of string"};
node->end++, state.offset++;
node->word = sentence.substr(node->start + 1, node->end - node->start - 1);
std::cerr << "FOUND QWORD " << node->word.to_string() << " from " << node->start << " to " << node->end << "\n";
return node;
}
else
{
auto node = std::make_unique<WordNode>();
node->end = node->start = state.offset;
while (APT::StringView("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.*^$[]_\\").find(sentence[state.offset]) != APT::StringView::npos)
{
std::cerr << "IN THE WORD " << sentence[state.offset] << "\n";
state.offset++;
}
node->end = state.offset;
node->word = sentence.substr(node->start, node->end - node->start);
std::cerr << "FOUND WORD " << node->word.to_string() << " from " << node->start << " to " << node->end - 2 << "\n";
return node;
}
}
int main(int argc, char *argv[])
{
PatternTreeParser(argv[1]).parse()->render(std::cout) << "\n";
return 0;
}
Reply to: