[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: APT 2.0 search patterns



On Mon, May 06, 2019 at 07:00:02PM +0200, Julian Andres Klode wrote:
> Hi,
> 
> so this below is roughly a first draft at understanding
> the syntax of search patterns of aptitude and integrating
> it into apt. It's very incomplete and needs more work.

I have narrowed the syntax down in order to make things easier
to implement and more uniform, so the parsing first creates
a rough parse tree and then a more usable abstract syntax
tree (which can then be tree-walked).

We only support three forms of syntax now:

	pattern = '?' NAME
	        | '?' NAME '(' pattern (',' pattern)* ','? ')'
	        | WORD
		| QUOTED-WORD

It's kind of a lisp, just with the list head moved to before
the '(', and commas separating arguments.

where

	WORD = [0-9a-zA-Z-.*^$\[\]_\\]+
	QUOTED-WORD = "[^"]+"
	NAME = [0-9a-zA-Z-]

that said, we don't use a separate tokenizer.

this is parsed into a simple parse tree. The simple parse tree
has a few differences from the grammar above, but see for yourself:

	// Handles ?NAME and ?NAME(...
	struct PatternNode : public Node {
		APT::StringView term;
		std::vector<std::unique_ptr<Node>> arguments;

		std::ostream& render(std::ostream& stream) override;
	};

	// Handles barewords and quoted words
	struct WordNode : public Node {
		APT::StringView word;
		bool quoted = false;
		std::ostream& render(std::ostream& stream) override;
	};

The next step then is transforming that parse tree into an
abstract syntax tree. Nothing done on that front yet.

Parser is attached, it parses a given argument and prints out
JSON.

-- 
debian developer - deb.li/jak | jak-linux.org - free software dev
ubuntu core developer                              i speak de, en
#include <apt-pkg/string_view.h>
#include <iostream>
#include <memory>
#include <string>
#include <vector>

struct PatternTreeParser
{

   struct Node
   {
      /// \brief Offset of the start of this node
      off_t start = 0;
      /// \brief Offset of the end of this node
      off_t end = 0;

      void error(std::string message);
      virtual std::ostream &render(std::ostream &os) { return os; };
   };

   struct Error
   {
      Node location;
      std::string message;
   };

   struct PatternNode : public Node
   {
      APT::StringView term;
      std::vector<std::unique_ptr<Node>> arguments;

      std::ostream &render(std::ostream &stream) override;
   };

   struct WordNode : public Node
   {
      APT::StringView word;
      bool quoted = false;
      std::ostream &render(std::ostream &stream) override;
   };

   APT::StringView sentence;
   struct state
   {
      off_t offset = 0;
   } state;

   PatternTreeParser(APT::StringView sentence) : sentence(sentence){};
   std::unique_ptr<Node> parse();
};

std::ostream &PatternTreeParser::PatternNode::render(std::ostream &os)
{
   os << "{"
      << "\"term\": \"" << term.to_string() << "\",\n"
      << "\"arguments\": [\n";
   for (auto &node : arguments)
      node->render(os) << "," << std::endl;
   os << "null]\n";
   os << "}\n";
   return os;
}
std::ostream &PatternTreeParser::WordNode::render(std::ostream &os)
{
   os << '"' << word.to_string() << '"';
   return os;
}

void PatternTreeParser::Node::error(std::string message)
{
   throw Error{*this, message};
}

std::unique_ptr<PatternTreeParser::Node> PatternTreeParser::parse()
{
   auto skipSpace = [this]() -> off_t {
      while (sentence[state.offset] == ' ' || sentence[state.offset] == '\t' || sentence[state.offset] == '\r' || sentence[state.offset] == '\n')
	 state.offset++;
      return state.offset;
   };

   if (sentence[state.offset] == '\0')
   {
      abort();
   }
   else if (sentence[state.offset] == '?')
   {
      state.offset++;
      auto node = std::make_unique<PatternNode>();
      node->end = node->start = state.offset;
      while (APT::StringView("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-").find(sentence[state.offset]) != APT::StringView::npos)
      {
	 node->end = ++state.offset;
      }

      node->term = sentence.substr(node->start, node->end - node->start);

      skipSpace();
      // We don't have any arguments, return node;
      if (sentence[state.offset] != '(')
	 return node;
      node->end = ++state.offset;
      skipSpace();

      // Empty argument list, return
      if (sentence[state.offset] == ')')
	 return node;

      node->arguments.push_back(parse());
      skipSpace();
      while (sentence[state.offset] == ',')
      {
	 node->end = ++state.offset;
	 skipSpace();
	 node->arguments.push_back(parse());
	 skipSpace();
      }
      // Empty argument list, return
      if (sentence[state.offset] == ',')
      {
	 node->end = state.offset++;
	 node->end = skipSpace();
      }

      if (sentence[state.offset] != ')')
	 throw Error{*node, "Could not find end of argument list"};

      node->end = ++state.offset;
      node->end = node->arguments[node->arguments.size() - 1]->end;
      return node;
   }
   else if (sentence[state.offset] == '"')
   {
      auto node = std::make_unique<WordNode>();
      node->end = node->start = state.offset;

      state.offset++;
      while (sentence[state.offset] != '"' && sentence[state.offset] != '\0')
      {
	 std::cerr << "IN THE QWORD " << sentence[state.offset] << "\n";
	 node->end++, state.offset++;
      }
      if (sentence[state.offset] != '"')
	 throw Error{*node, "Could not find end of string"};
      node->end++, state.offset++;

      node->word = sentence.substr(node->start + 1, node->end - node->start - 1);
      std::cerr << "FOUND QWORD " << node->word.to_string() << " from " << node->start << " to " << node->end << "\n";

      return node;
   }
   else
   {
      auto node = std::make_unique<WordNode>();
      node->end = node->start = state.offset;

      while (APT::StringView("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-.*^$[]_\\").find(sentence[state.offset]) != APT::StringView::npos)
      {
	 std::cerr << "IN THE WORD " << sentence[state.offset] << "\n";
	 state.offset++;
      }

      node->end = state.offset;
      node->word = sentence.substr(node->start, node->end - node->start);
      std::cerr << "FOUND WORD " << node->word.to_string() << " from " << node->start << " to " << node->end - 2 << "\n";
      return node;
   }
}

int main(int argc, char *argv[])
{

   PatternTreeParser(argv[1]).parse()->render(std::cout) << "\n";
   return 0;
}

Reply to: