The shunting yard algorithm can convert an infix expression into a postfix expression, which can be easily converted into a binary expression tree.
Infix notation (or infix notation) is a common method of representing arithmetic or logical formulas, where operators are placed in the middle of operands (e.g., 3 + 4). Compared to prefix notation (e.g., + 3 4) or postfix notation (e.g., 3 4 +), infix notation is not easy to parse by computers, but it is still used in many programming languages because it conforms to common usage.
— Wikipedia
Algorithm#
Text Version#
- While there are still tokens to be read:
- Read a token.
- If the token is a number, add it to the output queue.
- If the token is a function, push it onto the operator stack.
- If the token is a function argument separator (e.g., a comma):
- Until the top of the stack is a left parenthesis, pop operators off the stack onto the output queue. If no left parenthesis is encountered, either the separator was misplaced or parentheses were mismatched.
- If the token is an operator, o1, then:
- While there is an operator, o2, at the top of the stack, and
if o1 is left-associative and its precedence is less than or equal to that of o2, or
if o1 is right-associative and its precedence is less than that of o2,
pop o2 off the stack onto the output queue (repeat this step until the above condition is not satisfied); - Then push o1 onto the stack.
- While there is an operator, o2, at the top of the stack, and
- If the token is a left parenthesis, then push it onto the stack.
- If the token is a right parenthesis:
- Until the top of the stack is a left parenthesis, pop operators off the stack onto the output queue.
- Pop the left parenthesis from the stack, but not onto the output queue.
- If the token at the top of the stack is a function, pop it off and onto the output queue.
- If the stack runs out without finding a left parenthesis, then there are mismatched parentheses in the expression.
- While there are still operators on the stack:
- If the operator at the top of the stack is a parenthesis, then there are mismatched parentheses in the expression.
- Pop the operator off the stack onto the output queue.
- Exit algorithm.
State Machine#
I have summarized the above text into a state machine. To simplify, I did not consider the case of expression errors.
Functions can be like sin(x)
, tan(x)
, etc.
Quoting an example from Wikipedia#
We input 3 + 4 * 2 / (1 - 5) ^ (2 ^ 3)
Input | Action | Output Queue | Operator Stack | Note |
---|---|---|---|---|
3 | Add to output queue | 3 | ||
+ | Push to operator stack | 3 | + | |
4 | Add to output queue | 3 4 | + | |
* | Push to operator stack | 3 4 | * + | * has higher precedence than + |
2 | Add to output queue | 3 4 2 | * + | |
/ | Pop from stack to queue | 3 4 2 * | + | / and * have the same precedence |
/ | Push to operator stack | 3 4 2 * | / + | / has higher precedence than + |
( | Push to operator stack | 3 4 2 * | ( / + | |
1 | Add to output queue | 3 4 2 * 1 | ( / + | |
- | Push to operator stack | 3 4 2 * 1 | - ( / + | |
5 | Add to output queue | 3 4 2 * 1 5 | - ( / + | |
) | Pop from stack to queue | 3 4 2 * 1 5 - | ( / + | Loop until ( is found |
) | Pop from stack | 3 4 2 * 1 5 - | / + | Parentheses match |
^ | Push to operator stack | 3 4 2 * 1 5 - | ^ / + | ^ has higher precedence than / |
2 | Add to output queue | 3 4 2 * 1 5 - 2 | ^ / + | |
^ | Push to operator stack | 3 4 2 * 1 5 - 2 | ^ ^ / + | ^ is right associative |
3 | Add to output queue | 3 4 2 * 1 5 - 2 3 | ^ ^ / + | |
END | Pop all operators from stack to queue | 3 4 2 * 1 5 - 2 3 ^ ^ / + |
CPP Implementation#
After understanding the algorithm above, the implementation should be straightforward. The following code does not implement functions, only numbers and operators. But with the structure in place, implementing a function should not be difficult.
#include <iostream>
#include <string>
#include <stack>
#include <vector>
#include <stdexcept>
#include <cmath>
#define is_operator(c) (c == '+' || c == '-' || c == '/' || c == '*' || c == '%' || c == '^')
#define is_left_operator(c) (c == '+')
#define is_others(c) (c == '(' || c == ')')
using namespace std;
// Parse tree structure
struct ParseTree{
string data; // Data stored
int result = -1; // Result of subtree calculation
// int diff = -1; // Might be used for automatic differentiation, not used here
struct ParseTree* left = nullptr;
struct ParseTree* right = nullptr;
};
// Operator stack
stack<char> op_stack;
// Parse tree stack
stack<ParseTree*> parse_stack;
// Output queue
vector<string> o_vector;
// Convert character to integer
int char2int(char c){
return c - '0';
}
// Initialize global variables
void initVariables() {
while (!op_stack.empty()) {
op_stack.pop();
}
while (!parse_stack.empty()) {
parse_stack.pop();
}
o_vector.clear();
}
// + == - < * == / == % < ^
// 1 1 2 2 2
// Get the priority of an operator
int getPriority(char op1) {
if (op1 == '(' || op1 == ')')
return 0;
if (op1 == '+' || op1 == '-')
return 1;
if (op1 == '*' || op1 == '/' || op1 == '%')
return 2;
if (op1 == '^')
return 3;
throw invalid_argument("Received invalid operator");
}
// Get the type of a character
enum CharType {Number, Function, Operator, Default, Others};
CharType getType(char c) {
CharType type = Default;
if ('0' <= c && c <= '9') {
type = Number;
}
if (is_operator(c)) {
type = Operator;
}
if (is_others(c)) {
type = Others;
}
return type;
}
// Move the top of the operator stack to the output queue
void move2queue() {
if (op_stack.empty()) {
throw invalid_argument("Cannot pop an empty stack");
}
char top = op_stack.top();
string temp(1, top);
o_vector.push_back(temp);
op_stack.pop();
}
// Push an accumulator number to the output queue
void push_accu(int accu) {
if (accu < 0)
return;
string temp = to_string(accu);
o_vector.push_back(temp);
}
// Shunting yard algorithm
bool shunting_yard(string input){
int length = (int)input.length();
// This flag is used to mark whether it is a continuous number, continuous numbers should be merged into one number.
bool flagContinuousNumber = false;
int accumulator = -1;
// Begin Loop
for (int i = 0; i < length; i++){
CharType type = getType(input[i]);
if (type == Default){
flagContinuousNumber = false;
push_accu(accumulator);
accumulator = -1;
continue;
}
if (type == Number) {
if (flagContinuousNumber) { // last one is a number
accumulator = accumulator * 10 + char2int(input[i]);
} else { // last one is not a number, but this one is.
flagContinuousNumber = true;
accumulator = char2int(input[i]);
}
continue;
} else {
flagContinuousNumber = false;
push_accu(accumulator);
accumulator = -1;
}
if (type == Operator) {
while(!op_stack.empty()) {
if (getPriority(input[i]) <= getPriority(op_stack.top())) {
move2queue();
} else {
break;
}
}
op_stack.push(input[i]);
continue;
}
if (type == Others) {
if (input[i] == '(') {
op_stack.push(input[i]);
}
if (input[i] == ')') {
while (op_stack.top() != '(') {
move2queue();
}
op_stack.pop();
}
continue;
}
}
if (flagContinuousNumber)
push_accu(accumulator);
while (!op_stack.empty()) {
char top = op_stack.top();
if (top == '(' || top == ')') {
return false;
}
string temp(1, top);
o_vector.push_back(temp);
op_stack.pop();
}
return true;
}
int main(){
initVariables();
// const string input = "(1+2) * (3 * (4+5))"; // 81
const string input = "4+ 2 ^ 3 ^ 2 + 4 / 2"; // 516
shunting_yard(input);
return 0;
}