Working with C code

“Hello world”

Here’s a simple “hello world” C program:


#include <stdio.h>

int main(int argc, char *argv[])
{
    printf("Hello, python\n");
}

Here’s a Python script that locates the function at one pass within the compile and prints various interesting things about it:

import gcc

# Here's a callback.  We will wire it up below:
def on_pass_execution(p, fn):
    # This pass is called fairly early on, per-function, after the
    # CFG has been built:
    if p.name == '*warn_function_return':
        # For this pass, "fn" will be an instance of gcc.Function:
        print('fn: %r' % fn)
        print('fn.decl.name: %r' % fn.decl.name)

        # fn.decl is an instance of gcc.FunctionDecl:
        print('return type: %r' % str(fn.decl.type.type))
        print('argument types: %r' % [str(t) for t in fn.decl.type.argument_types])

        assert isinstance(fn.cfg, gcc.Cfg) # None for some early passes
        assert len(fn.cfg.basic_blocks) == 3
        assert fn.cfg.basic_blocks[0] == fn.cfg.entry
        assert fn.cfg.basic_blocks[1] == fn.cfg.exit
        bb = fn.cfg.basic_blocks[2]
        for i,stmt in enumerate(bb.gimple):
            print('gimple[%i]:' % i)
            print('  str(stmt): %r' % str(stmt))
            print('  repr(stmt): %r' % repr(stmt))
            if isinstance(stmt, gcc.GimpleCall):
                from gccutils import pprint
                print('  type(stmt.fn): %r' % type(stmt.fn))
                print('  str(stmt.fn): %r' % str(stmt.fn))
                for i, arg in enumerate(stmt.args):
                    print('  str(stmt.args[%i]): %r' % (i, str(stmt.args[i])))
                print('  str(stmt.lhs): %s' % str(stmt.lhs))

# Wire up our callback:
gcc.register_callback(gcc.PLUGIN_PASS_EXECUTION,
                      on_pass_execution)

We can run the script during the compile like this:

./gcc-with-python script.py test.c

Here’s the expected output:

fn: gcc.Function('main')
fn.decl.name: 'main'
return type: 'int'
argument types: ['int', 'char * *']
gimple[0]:
  str(stmt): '__builtin_puts (&"Hello, python"[0]);'
  repr(stmt): 'gcc.GimpleCall()'
  type(stmt.fn): <type 'gcc.AddrExpr'>
  str(stmt.fn): '__builtin_puts'
  str(stmt.args[0]): '&"Hello, python"[0]'
  str(stmt.lhs): None
gimple[1]:
  str(stmt): 'return;'
  repr(stmt): 'gcc.GimpleReturn()'

Notice how the call to printf has already been optimized into a call to __builtin_puts.

Spell-checking string constants within source code

This example add a spell-checker pass to GCC: all string constants are run through the “enchant” spelling-checker:

$ ./gcc-with-python tests/examples/spelling-checker/script.py input.c

The Python code for this is:


import gcc

# Use the Python bindings to the "enchant" spellchecker:
import enchant
spellingdict = enchant.Dict("en_US")

class SpellcheckingPass(gcc.GimplePass):
    def execute(self, fun):
        # This is called per-function during compilation:
        for bb in fun.cfg.basic_blocks:
            if bb.gimple:
                for stmt in bb.gimple:
                    stmt.walk_tree(self.spellcheck_node, stmt.loc)

    def spellcheck_node(self, node, loc):
        # Spellcheck any textual constants found within the node:
        if isinstance(node, gcc.StringCst):
            words = node.constant.split()
            for word in words:
                if not spellingdict.check(word):
                    # Warn about the spelling error (controlling the warning
                    # with the -Wall command-line option):
                    if gcc.warning(loc,
                                   'Possibly misspelt word in string constant: %r' % word,
                                   gcc.Option('-Wall')):
                        # and, if the warning was not suppressed at the command line, emit
                        # suggested respellings:
                        suggestions = spellingdict.suggest(word)
                        if suggestions:
                            gcc.inform(loc, 'Suggested respellings: %r' % ', '.join(suggestions))

ps = SpellcheckingPass(name='spellchecker')
ps.register_after('cfg')

Given this sample C source file:


#include <stdio.h>

int main(int argc, char *argv[])
{
    const char *p = argc ? "correctly spelled" : "not so korectly speled";

    printf("The quick brown fox jumps over the lazy dog\n");

    printf("Ths s n xmple f spllng mstke\n");
}

these warnings are emitted on stderr:

tests/examples/spelling-checker/input.c: In function 'main':
tests/examples/spelling-checker/input.c:24:48: warning: Possibly misspelt word in string constant: 'korectly' [-Wall]
tests/examples/spelling-checker/input.c:24:48: note: Suggested respellings: 'correctly'
tests/examples/spelling-checker/input.c:24:48: warning: Possibly misspelt word in string constant: 'speled' [-Wall]
tests/examples/spelling-checker/input.c:24:48: note: Suggested respellings: 'speed, spieled, spelled, spewed, speckled, peeled, sped'
tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'Ths' [-Wall]
tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: "Th, Th's, Ohs, Thu, TVs, T's, Th s, Ts, This, Thus, The, Tho, Tbs, Thy, Goths"
tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'xmple' [-Wall]
tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'ample'
tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'spllng' [-Wall]
tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'spelling'
tests/examples/spelling-checker/input.c:28:11: warning: Possibly misspelt word in string constant: 'mstke' [-Wall]
tests/examples/spelling-checker/input.c:28:11: note: Suggested respellings: 'mistake'

Finding global variables

This example adds a pass that warns about uses of global variables:

$ ./gcc-with-python \
    tests/examples/find-global-state/script.py \
    -c \
    tests/examples/find-global-state/input.c

The Python code for this is:

import gcc
from gccutils import get_src_for_loc

DEBUG=0

def is_const(type_):
    if DEBUG:
        type_.debug()

    if hasattr(type_, 'const'):
        if type_.const:
            return True

    # Don't bother warning about an array of const e.g.
    # const char []
    if isinstance(type_, gcc.ArrayType):
        item_type = type_.dereference
        if is_const(item_type):
            return True


class StateFinder:
    def __init__(self):
        # Locate all declarations of variables holding "global" state:
        self.global_decls = set()

        for var in gcc.get_variables():
            type_ = var.decl.type

            if DEBUG:
                print('var.decl: %r' % var.decl)
                print(type_)

            # Don't bother warning about const data:
            if is_const(type_):
                continue

            self.global_decls.add(var.decl)
        if DEBUG:
            print('self.global_decls: %r' % self.global_decls)

        self.state_users = set()

    def find_state_users(self, node, loc):
        if isinstance(node, gcc.VarDecl):
            if node in self.global_decls:
                # store the state users for later replay, so that
                # we can eliminate duplicates
                #   e.g. two references to "q" in "q += p"
                # and replay in source-location order:
                self.state_users.add( (loc, node) )

    def flush(self):
        # Emit warnings, sorted by source location:
        for loc, node in sorted(self.state_users,
                                key=lambda pair:pair[0]):
            gcc.inform(loc,
                       'use of global state "%s %s" here'
                       % (node.type, node))

def on_pass_execution(p, fn):
    if p.name == '*free_lang_data':
        sf = StateFinder()

        # Locate uses of such variables:
        for node in gcc.get_callgraph_nodes():
            fun = node.decl.function
            if fun:
                cfg = fun.cfg
                if cfg:
                    for bb in cfg.basic_blocks:
                        stmts = bb.gimple
                        if stmts:
                            for stmt in stmts:
                                stmt.walk_tree(sf.find_state_users,
                                               stmt.loc)

        # Flush the data that was found:
        sf.flush()

gcc.register_callback(gcc.PLUGIN_PASS_EXECUTION,
                      on_pass_execution)

Given this sample C source file:


#include <stdio.h>

static int a_global;

struct {
  int f;
} bar;

extern int foo;

int test(int j)
{
  /* A local variable, which should *not* be reported: */
  int i;
  i = j * 4;
  return i + 1;
}

int test2(int p)
{
  static int q = 0;
  q += p;
  return p * q;
}

int test3(int k)
{
  /* We should *not* report about __FUNCTION__ here: */
  printf("%s:%i:%s\n", __FILE__, __LINE__, __FUNCTION__);
}

int test4()
{
  return foo;
}

int test6()
{
  return bar.f;
}

struct banana {
  int f;
};

const struct banana a_banana;

int test7()
{
  return a_banana.f;
}

these warnings are emitted on stderr:

tests/examples/find-global-state/input.c:41:nn: note: use of global state "int q" here
tests/examples/find-global-state/input.c:41:nn: note: use of global state "int q" here
tests/examples/find-global-state/input.c:42:nn: note: use of global state "int q" here
tests/examples/find-global-state/input.c:53:nn: note: use of global state "int foo" here
tests/examples/find-global-state/input.c:58:nn: note: use of global state "struct 
{
  int f;
} bar" here