プログラミングの問題、分かりますか？ (ID:55930)

名前

ホームページ(ブログ、Twitterなど)のURL (省略可)

本文

# 元投稿に対するレスではありません。

テーマとしては面白いものなので'自己研鑽'のために書いてみました。
「関数」ではなく、「'('の直前の識別子」をカウントします。
完璧にはほど遠いですが、コメントの削除、プリプロセッサコマンドの分離、文字列リテラルの判定、トークン分割といった基本的な処理はひととおり含まれています。
ご批判下さい > 諸賢

# １月半ほど前にいくつかのBBSで見かけた
# 「標準入力からテキストファイルを読み込み行を短い順に出力せよ」
# という課題と出題傾向が良く似ていますね。
# 複数の学生が見境なくあちこちに投稿して聞きまくるのに、自分(達)の努
# 力の結果については何も報告しないという学生の'タチ'も似ています。
# 90%以上の確率で同じ学校の同じ教師による出題だと思います。
# きちんと指導して下さいね > 教師サマ

// exam.
// C/C++言語のソースコードにおける'('の直前の識別子と出現度数を，度数の降順で出力するプログラム例
// コメント，プリプロセッサコマンド，文字列リテラル内はカウントしない

#include <string>
#include <vector>
#include <map>
#include <algorithm>
#include <functional>
#include <sstream>
#include <iostream>

const std::string SYMBOLS     = "+-*/%<=>!&|^~?:.,;(){}[]";
const std::string OPERATORS[] =
    {
      "+=", "++", "-=", "--", "->", "*=", "/=", "%=", "<=", "<<",
      "==", ">=", ">>", "!=", "&=", "&&", "|=", "||", "^=", "::",
    };

// 文字列str中の位置s以降で，文字列tagに一致する最初の部分文字列の先頭位置(該当ない場合はnpos)を返す
// 位置はstrの先頭を0としたインデックス．また，" "で括られた文字列リテラル内は検索対象外．
std::string::size_type find_tag( const std::string& str, std::string::size_type s, const std::string& tag )
{
    std::vector< std::string::size_type > quot;
    int back_slash = 0;
    for( std::string::size_type pos = s; pos < str.size(); ++pos ){
        if( str[pos] == '\"' && back_slash % 2 == 0 ){
            quot.push_back( pos );
        }
        back_slash = str[pos] == '\\' ? back_slash + 1 : 0;
    }
    if( !quot.empty() ){
        for( std::vector< std::string::size_type >::size_type idx = 0; idx < quot.size(); ++idx ){
            std::string::size_type pos = str.find( tag, s );
            if( pos < quot[idx] ){
                return pos;
            }
            s = quot[++idx] + 1;
        }
    }
    return str.find( tag, s );
}

// テキストデータを読み込み，コメント，プリプロセッサコマンドを除いた文字列に変換する
std::string read_source( std::istream& istrm )
{
    std::string output;

// 行単位で読み込みつつ，"// 〜"スタイルのコメント削除，複数行のマクロ定義連結
    for( std::string line; std::getline( istrm, line ); ){
        std::string::size_type pos = line.find_first_not_of( " \t" );
        if( pos != std::string::npos ){
            line = line.substr( pos, line.find_last_not_of( " \t" ) - pos + 1 );
            pos = find_tag( line, 0, "//" );
            if( pos == 0 ){
                continue;
            }
            if( pos != std::string::npos ){
                line.erase( pos );
                line = line.substr( 0, line.find_last_not_of( " \t" ) + 1 );
            }
            if( *line.rbegin() == '\\' ){
                line.erase( line.end() - 1 );
                output += line + ' ';
            }
            else{
                output += line + '\n';
            }
        }
    }

// "/* 〜 */" スタイルのコメント削除
    for( std::string::size_type beg = 0; ( beg = find_tag( output, beg, "/*" ) ) != std::string::npos; ){
        std::string::size_type end = find_tag( output, beg + 2, "*/" );
        if( end != std::string::npos ){
            output.erase( beg, end + 2 - beg );
        }
        else{
            output.erase( beg );
            break;
        }
    }

// プリプロセッサコマンド行を除いて返却
    std::istringstream iss( output );
    output.clear();
    for( std::string line; std::getline( iss, line ); ){
        if( *line.begin() != '#' ){
            output += line + ' ';
        }
    }
    return output;
}

bool is_operator( char first, char second )
{
    static const size_t OP_SIZE = sizeof( OPERATORS ) / sizeof( std::string );

std::string symbol;
    symbol += first;
    symbol += second;
    return std::find( OPERATORS, OPERATORS+OP_SIZE, symbol ) != OPERATORS+OP_SIZE;
}

// srcに与えられた文字列を，識別子，キーワード，演算子その他の記号に分割してdestに格納する
// srcはread_source関数による処理後の文字列とする
void tokenize_source( const std::string& src, std::vector< std::string >& dest )
{
    dest.clear();
    std::string tok;
    bool in_dquot = false;
    bool in_squot = false;
    int back_slash = 0;
    for( std::string::size_type pos = 0; pos < src.size(); ++pos ){
        if( in_dquot || in_squot ){
            tok += src[pos];
        }
        else if( src[pos] == ' ' || src[pos] == '\t' || src[pos] == '\n' ){
            if( !tok.empty() ){
                dest.push_back( tok );
                tok.clear();
            }
        }
        else if( SYMBOLS.find( src[pos] ) != std::string::npos ){
            if( !tok.empty() ){
                dest.push_back( tok );
                tok.clear();
            }
            if( is_operator( src[pos], src[pos+1] ) ){
                tok += src[pos++];
            }
            tok += src[pos];
            dest.push_back( tok );
            tok.clear();
        }
        else{
            tok += src[pos];
        }

if( src[pos] == '\"' && back_slash % 2 == 0 ){
            in_dquot = !in_dquot;
        }
        if( !in_dquot && src[pos] == '\'' && back_slash % 2 == 0 ){
            in_squot = !in_squot;
        }
        back_slash = src[pos] == '\\' ? back_slash + 1 : 0;
    }
}

// --------------------------------------------------------------------------------------------------
// Application
// C/C++言語のソースコードにおける'('の直前の識別子と出現度数を，度数の降順で出力する．
// コメント，プリプロセッサコマンド，文字列リテラル内はカウントしない

const char* KEYWORDS[] = // 識別子としない文字列
    {
        "if", "for", "while", "switch",
        "sizeof",
        "char", "int", "long", "unsigned", "signed", "float", "double",
    };

class TokenFreq
{
    std::string name_; // 識別子
    int freq_;         // 度数

public:
    TokenFreq() : name_( "" ), freq_( 0 ){}
    TokenFreq( const std::string& name, const int& freq ) : name_( name ), freq_( freq ){}

std::string name() const { return name_; }
    int freq() const { return freq_; }

bool operator < ( const TokenFreq& rhs ) const // 度数の昇順ソート用
    {
        return freq_ < rhs.freq_ || ( freq_ == rhs.freq_ && name_ < rhs.name_ );
    }

bool operator > ( const TokenFreq& rhs ) const // 度数の降順ソート用
    {
        return freq_ > rhs.freq_ || ( freq_ == rhs.freq_ && name_ < rhs.name_ );
    }
};

void collect_token_front_of_paren( std::istream& istrm, std::vector< TokenFreq >& result )
{
    std::vector< std::string > keywords;
    for( size_t i = 0; i < sizeof( KEYWORDS ) / sizeof( char* ); ++i ){
        keywords.push_back( KEYWORDS[i] );
    }

std::vector< std::string > tokens;
    tokenize_source( read_source( istrm ), tokens );
    if( !tokens.empty() ){
        std::map< std::string, int > token_to_freq;
        for( std::vector< std::string >::size_type i = 0; i < tokens.size() - 1; ++i ){
            if( tokens[i+1] == "("
                && SYMBOLS.find( *tokens[i].rbegin() ) == std::string::npos
                && std::find( keywords.begin(), keywords.end(), tokens[i] ) == keywords.end() ){
                ++token_to_freq[tokens[i]];
            }
        }
        result.clear();
        for( std::map< std::string, int >::const_iterator it = token_to_freq.begin(); it != token_to_freq.end(); ++it ){
            result.push_back( TokenFreq( it->first, it->second ) );
        }
    }
}

int main()
{
    std::vector<TokenFreq > result;
    collect_token_front_of_paren( std::cin, result );
    std::sort( result.begin(), result.end(), std::greater< TokenFreq >() );
    for( std::vector< TokenFreq >::const_iterator it = result.begin(); it != result.end(); ++it ){
        std::cout << it->name() << " : " << it->freq() << std::endl;
    }
}

←解決時は質問者本人がここをチェックしてください。

戻る