/* * Copyright 2005, 2006, 2007 Florian Schmitz * * This file is part of CSSTidy. * * CSSTidy is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * CSSTidy is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . */ #include "csspp_globals.hpp" using namespace std; /* is = in selector * ip = in property * iv = in value * instr = in string (-> ",',( => ignore } and ; etc.) * ic = in comment (ignore everything) * at = in @-block */ extern map all_properties,replace_colors; extern map< string, vector > shorthands; extern map at_rules; void csstidy::parse_css(string css_input) { input_size = css_input.length(); css_input = str_replace("\r\n","\n",css_input); // Replace all double-newlines css_input += "\n"; parse_status status = is, from; cur_property = ""; cur_function = ""; string temp_add,cur_comment,temp; vector cur_sub_value_arr; vector cur_function_arr; // Stack of nested function calls char str_char; bool str_in_str = false; bool invalid_at = false; bool pn = false; int str_size = css_input.length(); for(int i = 0; i < str_size; ++i) { if(css_input[i] == '\n' || css_input[i] == '\r') { ++line; } switch(status) { /* Case in-at-block */ case at: if(is_token(css_input,i)) { if(css_input[i] == '/' && s_at(css_input,i+1) == '*') { status = ic; i += 2; from = at; } else if(css_input[i] == '{') { status = is; add_token(AT_START, cur_at); } else if(css_input[i] == ',') { cur_at = trim(cur_at) + ","; } else if(css_input[i] == '\\') { cur_at += unicode(css_input,i); } else { if(!in_char_arr("():/.", css_input[i])) { // Strictly speaking, these are only permitted in @media rules log("Unexpected symbol '" + string(css_input, i, 1) + "' in @-rule", Warning); } cur_at += css_input[i]; } } else { // Skip excess whitespace int lastpos = cur_at.length()-1; if(lastpos == -1 || !( (ctype_space(cur_at[lastpos]) || is_token(cur_at,lastpos) && cur_at[lastpos] == ',') && ctype_space(css_input[i]))) { cur_at += css_input[i]; } } break; /* Case in-selector */ case is: if(is_token(css_input,i)) { if(css_input[i] == '/' && s_at(css_input,i+1) == '*' && trim(cur_selector) == "") { status = ic; ++i; from = is; } else if(css_input[i] == '@' && trim(cur_selector) == "") { // Check for at-rule invalid_at = true; for(map::iterator j = at_rules.begin(); j != at_rules.end(); ++j ) { if(strtolower(css_input.substr(i+1,j->first.length())) == j->first) { (j->second == at) ? cur_at = "@" + j->first : cur_selector = "@" + j->first; status = j->second; i += j->first.length(); invalid_at = false; } } if(invalid_at) { cur_selector = "@"; string invalid_at_name = ""; for(int j = i+1; j < str_size; ++j) { if(!ctype_alpha(css_input[j])) { break; } invalid_at_name += css_input[j]; } log("Invalid @-rule: " + invalid_at_name + " (removed)",Warning); } } else if(css_input[i] == '"' || css_input[i] == '\'') { cur_string = css_input[i]; status = instr; str_char = css_input[i]; from = is; } else if(invalid_at && css_input[i] == ';') { invalid_at = false; status = is; } else if(css_input[i] == '{') { status = ip; add_token(SEL_START, cur_selector); ++selectors; } else if(css_input[i] == '}') { add_token(AT_END, cur_at); cur_at = ""; cur_selector = ""; sel_separate = vector(); } else if(css_input[i] == ',') { cur_selector = trim(cur_selector) + ","; sel_separate.push_back(cur_selector.length()); } else if(css_input[i] == '\\') { cur_selector += unicode(css_input,i); } // remove unnecessary universal selector, FS#147 else if(!(css_input[i] == '*' && (s_at(css_input,i+1) == '.' || s_at(css_input,i+1) == '[' || s_at(css_input,i+1) == ':' || s_at(css_input,i+1) == '#'))) { cur_selector += css_input[i]; } } else { int lastpos = cur_selector.length()-1; if(!( (ctype_space(cur_selector[lastpos]) || is_token(cur_selector,lastpos) && cur_selector[lastpos] == ',') && ctype_space(css_input[i]))) { cur_selector += css_input[i]; } } break; /* Case in-property */ case ip: if(is_token(css_input,i)) { if(css_input[i] == ':' || css_input[i] == '=' && cur_property != "") // IE really accepts =, so csstidy will fix those mistakes { status = iv; bool valid = !settings["discard_invalid_properties"] || (all_properties.count(cur_property) > 0 && all_properties[cur_property].find(css_level,0) != string::npos); if(valid) { add_token(PROPERTY, cur_property); } } else if(css_input[i] == '/' && s_at(css_input,i+1) == '*' && cur_property == "") { status = ic; ++i; from = ip; } else if(css_input[i] == '}') { explode_selectors(); status = is; invalid_at = false; add_token(SEL_END, cur_selector); cur_selector = ""; cur_property = ""; } else if(css_input[i] == ';') { cur_property = ""; } else if(css_input[i] == '\\') { cur_property += unicode(css_input,i); } else if(css_input[i] == '*') { // IE7 and below recognize properties that begin with '*' if (cur_property == "") { cur_property += css_input[i]; log("IE7- hack detected: property name begins with '*'", Warning); } } else { log("Unexpected character '" + string(1, css_input[i]) + "'in property name", Error); } } else if(!ctype_space(css_input[i])) { if(css_input[i] == '_' && cur_property == "") { // IE6 and below recognize properties that begin with '_' log("IE6 hack detected: property name begins with '_'", Warning); } // TODO: Check for invalid characters cur_property += css_input[i]; } // TODO: Check for whitespace inside property names break; /* Case in-value */ case iv: pn = ((css_input[i] == '\n' || css_input[i] == '\r') && property_is_next(css_input,i+1) || i == str_size-1); if(pn) { log("Added semicolon to the end of declaration",Warning); } if(is_token(css_input,i) || pn) { if(css_input[i] == '/' && s_at(css_input,i+1) == '*') { status = ic; ++i; from = iv; } else if(css_input[i] == '"' || css_input[i] == '\'' || (css_input[i] == '(' && cur_sub_value == "url") ) { str_char = (css_input[i] == '(') ? ')' : css_input[i]; cur_string = css_input[i]; status = instr; from = iv; } else if(css_input[i] == '(') { // function call or an open parenthesis in a calc() expression // url() is a special case that should have been handled above assert(cur_sub_value != "url"); // cur_sub_value should contain the name of the function, if any cur_sub_value = trim(cur_sub_value + "("); // set current function name and push it onto the stack cur_function = cur_sub_value; cur_function_arr.push_back(cur_sub_value); cur_sub_value_arr.push_back(cur_sub_value); cur_sub_value = ""; } else if(css_input[i] == '\\') { cur_sub_value += unicode(css_input,i); } else if(css_input[i] == ';' || pn) { if(cur_selector.substr(0,1) == "@" && at_rules.count(cur_selector.substr(1)) > 0 && at_rules[cur_selector.substr(1)] == iv) { cur_sub_value_arr.push_back(trim(cur_sub_value)); status = is; if(cur_selector == "@charset") charset = cur_sub_value_arr[0]; if(cur_selector == "@namespace") namesp = implode(" ",cur_sub_value_arr); if(cur_selector == "@import") import.push_back(build_value(cur_sub_value_arr)); cur_sub_value_arr.clear(); cur_sub_value = ""; cur_selector = ""; sel_separate = vector(); } else { status = ip; } } else if (css_input[i] == '!') { cur_sub_value = optimise_subvalue(cur_sub_value,cur_property,cur_function); // TODO: can '!' appear inside a function? cur_sub_value_arr.push_back(trim(cur_sub_value)); cur_sub_value = "!"; } else if (css_input[i] == ',' || css_input[i] == ')') { // optimise and store the current subvalue, if any cur_sub_value = trim(cur_sub_value); if(cur_sub_value != "") { cur_sub_value = optimise_subvalue(cur_sub_value, cur_property, cur_function); cur_sub_value_arr.push_back(cur_sub_value); cur_sub_value = ""; } bool drop = false; if (css_input[i] == ')') { if (cur_function_arr.empty()) { // No matching open parenthesis, drop this closing one log("Unexpected closing parenthesis, dropping", Warning); drop = true; } else { // Pop function from the stack cur_function_arr.pop_back(); cur_function = cur_function_arr.empty() ? "" : cur_function_arr.back(); } } if (!drop) { cur_sub_value_arr.push_back(string(1,css_input[i])); } } else if(css_input[i] != '}') { cur_sub_value += css_input[i]; } if( (css_input[i] == '}' || css_input[i] == ';' || pn) && !cur_selector.empty()) { // End of value: normalize, optimize and store property ++properties; if(cur_at == "") { cur_at = "standard"; } // Kill all whitespace cur_at = trim(cur_at); cur_selector = trim(cur_selector); cur_value = trim(cur_value); cur_property = trim(cur_property); cur_sub_value = trim(cur_sub_value); // case settings if(settings["lowercase_s"]) { cur_selector = strtolower(cur_selector); } cur_property = strtolower(cur_property); if(cur_sub_value != "") { cur_sub_value = optimise_subvalue(cur_sub_value,cur_property,cur_function); cur_sub_value_arr.push_back(cur_sub_value); cur_sub_value = ""; } // Check for leftover open parentheses if (!cur_function_arr.empty()) { std::vector::reverse_iterator rit; for (rit = cur_function_arr.rbegin(); rit != cur_function_arr.rend(); ++rit) { log("Closing parenthesis missing for '" + *rit + "', inserting", Warning); cur_sub_value_arr.push_back(")"); } } cur_value = build_value(cur_sub_value_arr); // Compress !important temp = c_important(cur_value); if(temp != cur_value) { log("Optimised !important",Information); } cur_value = temp; // Optimise shorthand properties if(shorthands.count(cur_property) > 0) { temp = shorthand(cur_value); if(temp != cur_value) { log("Optimised shorthand notation (" + cur_property + "): Changed \"" + cur_value + "\" to \"" + temp + "\"",Information); } cur_value = temp; } // Compress font-weight (tiny compression) if(cur_property == "font-weight" && settings["compress_font-weight"]) { int c_fw = c_font_weight(cur_value); if(c_fw == 400) { log("Optimised font-weight: Changed \"bold\" to \"700\"",Information); } else if(c_fw == 700) { log("Optimised font-weight: Changed \"normal\" to \"400\"",Information); } } bool valid = (all_properties.count(cur_property) > 0 && all_properties[cur_property].find(css_level,0) != string::npos); if((!invalid_at || settings["preserve_css"]) && (!settings["discard_invalid_properties"] || valid)) { add(cur_at,cur_selector,cur_property,cur_value); add_token(VALUE, cur_value); // Further Optimisation if(cur_property == "background" && settings["optimise_shorthands"] > 1) { map temp = dissolve_short_bg(cur_value); css[cur_at][cur_selector].erase("background"); for(map::iterator it = temp.begin(); it != temp.end(); ++it ) { add(cur_at,cur_selector,it->first,it->second); } } if(shorthands.count(cur_property) > 0 && settings["optimise_shorthands"] > 0) { map temp = dissolve_4value_shorthands(cur_property,cur_value); for(map::iterator it = temp.begin(); it != temp.end(); ++it ) { add(cur_at,cur_selector,it->first,it->second); } if(shorthands[cur_property][0] != "0") { css[cur_at][cur_selector].erase(cur_property); } } } if(!valid) { if(settings["discard_invalid_properties"]) { log("Removed invalid property: " + cur_property,Warning); } else { log("Invalid property in " + strtoupper(css_level) + ": " + cur_property,Warning); } } //Split multiple selectors here if necessary cur_property = ""; cur_sub_value_arr.clear(); cur_value = ""; } if(css_input[i] == '}') { explode_selectors(); add_token(SEL_END, cur_selector); status = is; invalid_at = false; cur_selector = ""; } } else if(!pn) { cur_sub_value += css_input[i]; if(ctype_space(css_input[i])) { if(trim(cur_sub_value) != "") { cur_sub_value = optimise_subvalue(cur_sub_value, cur_property, cur_function); cur_sub_value_arr.push_back(trim(cur_sub_value)); } cur_sub_value = ""; } } break; /* Case in-string */ case instr: if(str_char == ')' && (css_input[i] == '"' || css_input[i] == '\'') && str_in_str == false && !escaped(css_input,i)) { str_in_str = true; } else if(str_char == ')' && (css_input[i] == '"' || css_input[i] == '\'') && str_in_str == true && !escaped(css_input,i)) { str_in_str = false; } temp_add = ""; temp_add += css_input[i]; if( (css_input[i] == '\n' || css_input[i] == '\r') && !(css_input[i-1] == '\\' && !escaped(css_input,i-1)) ) { temp_add = "\\A "; log("Fixed incorrect newline in string",Warning); } if (!(str_char == ')' && char2str(css_input[i]).find_first_of(" \n\t\r\0xb") != string::npos && !str_in_str)) { cur_string += temp_add; } if(css_input[i] == str_char && !escaped(css_input,i) && str_in_str == false) { status = from; if (cur_function == "" && cur_string.find_first_of(" \n\t\r\0xb") == string::npos && cur_property != "content" && from != is) { // If the string is not inside a function call, contains no whitespace, // and the current property is not 'content', it may be safe to remove quotes. // TODO: Are there any properties other than 'content' where this is unsafe? // TODO: What if the string contains a comma or slash, and the property is a list or shorthand? if (str_char == '"' || str_char == '\'') { // If the string is in double or single quotes, remove them // FIXME: once url() is handled separately, this may always be the case. cur_string = cur_string.substr(1, cur_string.length() - 2); } else if (cur_string.length() > 3 && (cur_string[1] == '"' || cur_string[1] == '\'')) /* () */ { cur_string = cur_string[0] + cur_string.substr(2, cur_string.length() - 4) + cur_string[cur_string.length()-1]; } } if(from == iv) { cur_sub_value += cur_string; } else if(from == is) { cur_selector += cur_string; } } break; /* Case in-comment */ case ic: if(css_input[i] == '*' && s_at(css_input,i+1) == '/') { status = from; ++i; add_token(COMMENT, cur_comment); cur_comment = ""; } else { cur_comment += css_input[i]; } break; } } if(settings["merge_selectors"] > 1) { for(css_struct::iterator i = css.begin(); i != css.end(); i++ ) { merge_selectors(i->second); } } if(settings["optimise_shorthands"] > 0) { for(css_struct::iterator i = css.begin(); i != css.end(); ++i ) { for(sstore::iterator j = i->second.begin(); j != i->second.end();) { merge_4value_shorthands(i->first,j->first); if(settings["optimise_shorthands"] > 1) { merge_bg(j->second); } if(j->second.size() == 0) { i->second.erase(j); } else { ++j; } } } } } string csstidy::optimise_subvalue(string subvalue, const string property, const string function) { subvalue = trim(subvalue); string temp = compress_numbers(subvalue,property,function); if(temp != subvalue && settings["optimise_numbers"]) { if(temp.length() > subvalue.length()) { log("Fixed invalid number: Changed \"" + subvalue + "\" to \"" + temp + "\"",Warning); } else { log("Optimised number: Changed \"" + subvalue + "\" to \"" + temp + "\"",Information); } subvalue = temp; } if(settings["compress_colors"]) { temp = cut_color(subvalue); if(temp != subvalue) { if(replace_colors.count(subvalue) > 0) { log("Fixed invalid color name: Changed \"" + subvalue + "\" to \"" + temp + "\"",Warning); } else { log("Optimised color: Changed \"" + subvalue + "\" to \"" + temp + "\"",Information); } subvalue = temp; } } return subvalue; }